OLD | NEW |
| (Empty) |
1 #!/usr/bin/env perl | |
2 # | |
3 # ==================================================================== | |
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | |
5 # project. | |
6 # | |
7 # Rights for redistribution and usage in source and binary forms are | |
8 # granted according to the OpenSSL license. Warranty of any kind is | |
9 # disclaimed. | |
10 # ==================================================================== | |
11 | |
12 | |
13 # July 1999 | |
14 # | |
15 # This is drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c. | |
16 # | |
17 # The module is designed to work with either of the "new" MIPS ABI(5), | |
18 # namely N32 or N64, offered by IRIX 6.x. It's not ment to work under | |
19 # IRIX 5.x not only because it doesn't support new ABIs but also | |
20 # because 5.x kernels put R4x00 CPU into 32-bit mode and all those | |
21 # 64-bit instructions (daddu, dmultu, etc.) found below gonna only | |
22 # cause illegal instruction exception:-( | |
23 # | |
24 # In addition the code depends on preprocessor flags set up by MIPSpro | |
25 # compiler driver (either as or cc) and therefore (probably?) can't be | |
26 # compiled by the GNU assembler. GNU C driver manages fine though... | |
27 # I mean as long as -mmips-as is specified or is the default option, | |
28 # because then it simply invokes /usr/bin/as which in turn takes | |
29 # perfect care of the preprocessor definitions. Another neat feature | |
30 # offered by the MIPSpro assembler is an optimization pass. This gave | |
31 # me the opportunity to have the code looking more regular as all those | |
32 # architecture dependent instruction rescheduling details were left to | |
33 # the assembler. Cool, huh? | |
34 # | |
35 # Performance improvement is astonishing! 'apps/openssl speed rsa dsa' | |
36 # goes way over 3 times faster! | |
37 # | |
38 # <appro@fy.chalmers.se> | |
39 | |
40 # October 2010 | |
41 # | |
42 # Adapt the module even for 32-bit ABIs and other OSes. The former was | |
43 # achieved by mechanical replacement of 64-bit arithmetic instructions | |
44 # such as dmultu, daddu, etc. with their 32-bit counterparts and | |
45 # adjusting offsets denoting multiples of BN_ULONG. Above mentioned | |
46 # >3x performance improvement naturally does not apply to 32-bit code | |
47 # [because there is no instruction 32-bit compiler can't use], one | |
48 # has to content with 40-85% improvement depending on benchmark and | |
49 # key length, more for longer keys. | |
50 | |
51 $flavour = shift; | |
52 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} | |
53 open STDOUT,">$output"; | |
54 | |
55 if ($flavour =~ /64|n32/i) { | |
56 $LD="ld"; | |
57 $ST="sd"; | |
58 $MULTU="dmultu"; | |
59 $DIVU="ddivu"; | |
60 $ADDU="daddu"; | |
61 $SUBU="dsubu"; | |
62 $SRL="dsrl"; | |
63 $SLL="dsll"; | |
64 $BNSZ=8; | |
65 $PTR_ADD="daddu"; | |
66 $PTR_SUB="dsubu"; | |
67 $SZREG=8; | |
68 $REG_S="sd"; | |
69 $REG_L="ld"; | |
70 } else { | |
71 $LD="lw"; | |
72 $ST="sw"; | |
73 $MULTU="multu"; | |
74 $DIVU="divu"; | |
75 $ADDU="addu"; | |
76 $SUBU="subu"; | |
77 $SRL="srl"; | |
78 $SLL="sll"; | |
79 $BNSZ=4; | |
80 $PTR_ADD="addu"; | |
81 $PTR_SUB="subu"; | |
82 $SZREG=4; | |
83 $REG_S="sw"; | |
84 $REG_L="lw"; | |
85 $code=".set mips2\n"; | |
86 } | |
87 | |
88 # Below is N32/64 register layout used in the original module. | |
89 # | |
90 ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); | |
91 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); | |
92 ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); | |
93 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); | |
94 ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); | |
95 ($ta0,$ta1,$ta2,$ta3)=($a4,$a5,$a6,$a7); | |
96 # | |
97 # No special adaptation is required for O32. NUBI on the other hand | |
98 # is treated by saving/restoring ($v1,$t0..$t3). | |
99 | |
100 $gp=$v1 if ($flavour =~ /nubi/i); | |
101 | |
102 $minus4=$v1; | |
103 | |
104 $code.=<<___; | |
105 .rdata | |
106 .asciiz "mips3.s, Version 1.2" | |
107 .asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>" | |
108 | |
109 .text | |
110 .set noat | |
111 | |
112 .align 5 | |
113 .globl bn_mul_add_words | |
114 .ent bn_mul_add_words | |
115 bn_mul_add_words: | |
116 .set noreorder | |
117 bgtz $a2,bn_mul_add_words_internal | |
118 move $v0,$zero | |
119 jr $ra | |
120 move $a0,$v0 | |
121 .end bn_mul_add_words | |
122 | |
123 .align 5 | |
124 .ent bn_mul_add_words_internal | |
125 bn_mul_add_words_internal: | |
126 ___ | |
127 $code.=<<___ if ($flavour =~ /nubi/i); | |
128 .frame $sp,6*$SZREG,$ra | |
129 .mask 0x8000f008,-$SZREG | |
130 .set noreorder | |
131 $PTR_SUB $sp,6*$SZREG | |
132 $REG_S $ra,5*$SZREG($sp) | |
133 $REG_S $t3,4*$SZREG($sp) | |
134 $REG_S $t2,3*$SZREG($sp) | |
135 $REG_S $t1,2*$SZREG($sp) | |
136 $REG_S $t0,1*$SZREG($sp) | |
137 $REG_S $gp,0*$SZREG($sp) | |
138 ___ | |
139 $code.=<<___; | |
140 .set reorder | |
141 li $minus4,-4 | |
142 and $ta0,$a2,$minus4 | |
143 $LD $t0,0($a1) | |
144 beqz $ta0,.L_bn_mul_add_words_tail | |
145 | |
146 .L_bn_mul_add_words_loop: | |
147 $MULTU $t0,$a3 | |
148 $LD $t1,0($a0) | |
149 $LD $t2,$BNSZ($a1) | |
150 $LD $t3,$BNSZ($a0) | |
151 $LD $ta0,2*$BNSZ($a1) | |
152 $LD $ta1,2*$BNSZ($a0) | |
153 $ADDU $t1,$v0 | |
154 sltu $v0,$t1,$v0 # All manuals say it "compares 32-bit | |
155 # values", but it seems to work fine | |
156 # even on 64-bit registers. | |
157 mflo $at | |
158 mfhi $t0 | |
159 $ADDU $t1,$at | |
160 $ADDU $v0,$t0 | |
161 $MULTU $t2,$a3 | |
162 sltu $at,$t1,$at | |
163 $ST $t1,0($a0) | |
164 $ADDU $v0,$at | |
165 | |
166 $LD $ta2,3*$BNSZ($a1) | |
167 $LD $ta3,3*$BNSZ($a0) | |
168 $ADDU $t3,$v0 | |
169 sltu $v0,$t3,$v0 | |
170 mflo $at | |
171 mfhi $t2 | |
172 $ADDU $t3,$at | |
173 $ADDU $v0,$t2 | |
174 $MULTU $ta0,$a3 | |
175 sltu $at,$t3,$at | |
176 $ST $t3,$BNSZ($a0) | |
177 $ADDU $v0,$at | |
178 | |
179 subu $a2,4 | |
180 $PTR_ADD $a0,4*$BNSZ | |
181 $PTR_ADD $a1,4*$BNSZ | |
182 $ADDU $ta1,$v0 | |
183 sltu $v0,$ta1,$v0 | |
184 mflo $at | |
185 mfhi $ta0 | |
186 $ADDU $ta1,$at | |
187 $ADDU $v0,$ta0 | |
188 $MULTU $ta2,$a3 | |
189 sltu $at,$ta1,$at | |
190 $ST $ta1,-2*$BNSZ($a0) | |
191 $ADDU $v0,$at | |
192 | |
193 | |
194 and $ta0,$a2,$minus4 | |
195 $ADDU $ta3,$v0 | |
196 sltu $v0,$ta3,$v0 | |
197 mflo $at | |
198 mfhi $ta2 | |
199 $ADDU $ta3,$at | |
200 $ADDU $v0,$ta2 | |
201 sltu $at,$ta3,$at | |
202 $ST $ta3,-$BNSZ($a0) | |
203 $ADDU $v0,$at | |
204 .set noreorder | |
205 bgtzl $ta0,.L_bn_mul_add_words_loop | |
206 $LD $t0,0($a1) | |
207 | |
208 beqz $a2,.L_bn_mul_add_words_return | |
209 nop | |
210 | |
211 .L_bn_mul_add_words_tail: | |
212 .set reorder | |
213 $LD $t0,0($a1) | |
214 $MULTU $t0,$a3 | |
215 $LD $t1,0($a0) | |
216 subu $a2,1 | |
217 $ADDU $t1,$v0 | |
218 sltu $v0,$t1,$v0 | |
219 mflo $at | |
220 mfhi $t0 | |
221 $ADDU $t1,$at | |
222 $ADDU $v0,$t0 | |
223 sltu $at,$t1,$at | |
224 $ST $t1,0($a0) | |
225 $ADDU $v0,$at | |
226 beqz $a2,.L_bn_mul_add_words_return | |
227 | |
228 $LD $t0,$BNSZ($a1) | |
229 $MULTU $t0,$a3 | |
230 $LD $t1,$BNSZ($a0) | |
231 subu $a2,1 | |
232 $ADDU $t1,$v0 | |
233 sltu $v0,$t1,$v0 | |
234 mflo $at | |
235 mfhi $t0 | |
236 $ADDU $t1,$at | |
237 $ADDU $v0,$t0 | |
238 sltu $at,$t1,$at | |
239 $ST $t1,$BNSZ($a0) | |
240 $ADDU $v0,$at | |
241 beqz $a2,.L_bn_mul_add_words_return | |
242 | |
243 $LD $t0,2*$BNSZ($a1) | |
244 $MULTU $t0,$a3 | |
245 $LD $t1,2*$BNSZ($a0) | |
246 $ADDU $t1,$v0 | |
247 sltu $v0,$t1,$v0 | |
248 mflo $at | |
249 mfhi $t0 | |
250 $ADDU $t1,$at | |
251 $ADDU $v0,$t0 | |
252 sltu $at,$t1,$at | |
253 $ST $t1,2*$BNSZ($a0) | |
254 $ADDU $v0,$at | |
255 | |
256 .L_bn_mul_add_words_return: | |
257 .set noreorder | |
258 ___ | |
259 $code.=<<___ if ($flavour =~ /nubi/i); | |
260 $REG_L $t3,4*$SZREG($sp) | |
261 $REG_L $t2,3*$SZREG($sp) | |
262 $REG_L $t1,2*$SZREG($sp) | |
263 $REG_L $t0,1*$SZREG($sp) | |
264 $REG_L $gp,0*$SZREG($sp) | |
265 $PTR_ADD $sp,6*$SZREG | |
266 ___ | |
267 $code.=<<___; | |
268 jr $ra | |
269 move $a0,$v0 | |
270 .end bn_mul_add_words_internal | |
271 | |
272 .align 5 | |
273 .globl bn_mul_words | |
274 .ent bn_mul_words | |
275 bn_mul_words: | |
276 .set noreorder | |
277 bgtz $a2,bn_mul_words_internal | |
278 move $v0,$zero | |
279 jr $ra | |
280 move $a0,$v0 | |
281 .end bn_mul_words | |
282 | |
283 .align 5 | |
284 .ent bn_mul_words_internal | |
285 bn_mul_words_internal: | |
286 ___ | |
287 $code.=<<___ if ($flavour =~ /nubi/i); | |
288 .frame $sp,6*$SZREG,$ra | |
289 .mask 0x8000f008,-$SZREG | |
290 .set noreorder | |
291 $PTR_SUB $sp,6*$SZREG | |
292 $REG_S $ra,5*$SZREG($sp) | |
293 $REG_S $t3,4*$SZREG($sp) | |
294 $REG_S $t2,3*$SZREG($sp) | |
295 $REG_S $t1,2*$SZREG($sp) | |
296 $REG_S $t0,1*$SZREG($sp) | |
297 $REG_S $gp,0*$SZREG($sp) | |
298 ___ | |
299 $code.=<<___; | |
300 .set reorder | |
301 li $minus4,-4 | |
302 and $ta0,$a2,$minus4 | |
303 $LD $t0,0($a1) | |
304 beqz $ta0,.L_bn_mul_words_tail | |
305 | |
306 .L_bn_mul_words_loop: | |
307 $MULTU $t0,$a3 | |
308 $LD $t2,$BNSZ($a1) | |
309 $LD $ta0,2*$BNSZ($a1) | |
310 $LD $ta2,3*$BNSZ($a1) | |
311 mflo $at | |
312 mfhi $t0 | |
313 $ADDU $v0,$at | |
314 sltu $t1,$v0,$at | |
315 $MULTU $t2,$a3 | |
316 $ST $v0,0($a0) | |
317 $ADDU $v0,$t1,$t0 | |
318 | |
319 subu $a2,4 | |
320 $PTR_ADD $a0,4*$BNSZ | |
321 $PTR_ADD $a1,4*$BNSZ | |
322 mflo $at | |
323 mfhi $t2 | |
324 $ADDU $v0,$at | |
325 sltu $t3,$v0,$at | |
326 $MULTU $ta0,$a3 | |
327 $ST $v0,-3*$BNSZ($a0) | |
328 $ADDU $v0,$t3,$t2 | |
329 | |
330 mflo $at | |
331 mfhi $ta0 | |
332 $ADDU $v0,$at | |
333 sltu $ta1,$v0,$at | |
334 $MULTU $ta2,$a3 | |
335 $ST $v0,-2*$BNSZ($a0) | |
336 $ADDU $v0,$ta1,$ta0 | |
337 | |
338 and $ta0,$a2,$minus4 | |
339 mflo $at | |
340 mfhi $ta2 | |
341 $ADDU $v0,$at | |
342 sltu $ta3,$v0,$at | |
343 $ST $v0,-$BNSZ($a0) | |
344 $ADDU $v0,$ta3,$ta2 | |
345 .set noreorder | |
346 bgtzl $ta0,.L_bn_mul_words_loop | |
347 $LD $t0,0($a1) | |
348 | |
349 beqz $a2,.L_bn_mul_words_return | |
350 nop | |
351 | |
352 .L_bn_mul_words_tail: | |
353 .set reorder | |
354 $LD $t0,0($a1) | |
355 $MULTU $t0,$a3 | |
356 subu $a2,1 | |
357 mflo $at | |
358 mfhi $t0 | |
359 $ADDU $v0,$at | |
360 sltu $t1,$v0,$at | |
361 $ST $v0,0($a0) | |
362 $ADDU $v0,$t1,$t0 | |
363 beqz $a2,.L_bn_mul_words_return | |
364 | |
365 $LD $t0,$BNSZ($a1) | |
366 $MULTU $t0,$a3 | |
367 subu $a2,1 | |
368 mflo $at | |
369 mfhi $t0 | |
370 $ADDU $v0,$at | |
371 sltu $t1,$v0,$at | |
372 $ST $v0,$BNSZ($a0) | |
373 $ADDU $v0,$t1,$t0 | |
374 beqz $a2,.L_bn_mul_words_return | |
375 | |
376 $LD $t0,2*$BNSZ($a1) | |
377 $MULTU $t0,$a3 | |
378 mflo $at | |
379 mfhi $t0 | |
380 $ADDU $v0,$at | |
381 sltu $t1,$v0,$at | |
382 $ST $v0,2*$BNSZ($a0) | |
383 $ADDU $v0,$t1,$t0 | |
384 | |
385 .L_bn_mul_words_return: | |
386 .set noreorder | |
387 ___ | |
388 $code.=<<___ if ($flavour =~ /nubi/i); | |
389 $REG_L $t3,4*$SZREG($sp) | |
390 $REG_L $t2,3*$SZREG($sp) | |
391 $REG_L $t1,2*$SZREG($sp) | |
392 $REG_L $t0,1*$SZREG($sp) | |
393 $REG_L $gp,0*$SZREG($sp) | |
394 $PTR_ADD $sp,6*$SZREG | |
395 ___ | |
396 $code.=<<___; | |
397 jr $ra | |
398 move $a0,$v0 | |
399 .end bn_mul_words_internal | |
400 | |
401 .align 5 | |
402 .globl bn_sqr_words | |
403 .ent bn_sqr_words | |
404 bn_sqr_words: | |
405 .set noreorder | |
406 bgtz $a2,bn_sqr_words_internal | |
407 move $v0,$zero | |
408 jr $ra | |
409 move $a0,$v0 | |
410 .end bn_sqr_words | |
411 | |
412 .align 5 | |
413 .ent bn_sqr_words_internal | |
414 bn_sqr_words_internal: | |
415 ___ | |
416 $code.=<<___ if ($flavour =~ /nubi/i); | |
417 .frame $sp,6*$SZREG,$ra | |
418 .mask 0x8000f008,-$SZREG | |
419 .set noreorder | |
420 $PTR_SUB $sp,6*$SZREG | |
421 $REG_S $ra,5*$SZREG($sp) | |
422 $REG_S $t3,4*$SZREG($sp) | |
423 $REG_S $t2,3*$SZREG($sp) | |
424 $REG_S $t1,2*$SZREG($sp) | |
425 $REG_S $t0,1*$SZREG($sp) | |
426 $REG_S $gp,0*$SZREG($sp) | |
427 ___ | |
428 $code.=<<___; | |
429 .set reorder | |
430 li $minus4,-4 | |
431 and $ta0,$a2,$minus4 | |
432 $LD $t0,0($a1) | |
433 beqz $ta0,.L_bn_sqr_words_tail | |
434 | |
435 .L_bn_sqr_words_loop: | |
436 $MULTU $t0,$t0 | |
437 $LD $t2,$BNSZ($a1) | |
438 $LD $ta0,2*$BNSZ($a1) | |
439 $LD $ta2,3*$BNSZ($a1) | |
440 mflo $t1 | |
441 mfhi $t0 | |
442 $ST $t1,0($a0) | |
443 $ST $t0,$BNSZ($a0) | |
444 | |
445 $MULTU $t2,$t2 | |
446 subu $a2,4 | |
447 $PTR_ADD $a0,8*$BNSZ | |
448 $PTR_ADD $a1,4*$BNSZ | |
449 mflo $t3 | |
450 mfhi $t2 | |
451 $ST $t3,-6*$BNSZ($a0) | |
452 $ST $t2,-5*$BNSZ($a0) | |
453 | |
454 $MULTU $ta0,$ta0 | |
455 mflo $ta1 | |
456 mfhi $ta0 | |
457 $ST $ta1,-4*$BNSZ($a0) | |
458 $ST $ta0,-3*$BNSZ($a0) | |
459 | |
460 | |
461 $MULTU $ta2,$ta2 | |
462 and $ta0,$a2,$minus4 | |
463 mflo $ta3 | |
464 mfhi $ta2 | |
465 $ST $ta3,-2*$BNSZ($a0) | |
466 $ST $ta2,-$BNSZ($a0) | |
467 | |
468 .set noreorder | |
469 bgtzl $ta0,.L_bn_sqr_words_loop | |
470 $LD $t0,0($a1) | |
471 | |
472 beqz $a2,.L_bn_sqr_words_return | |
473 nop | |
474 | |
475 .L_bn_sqr_words_tail: | |
476 .set reorder | |
477 $LD $t0,0($a1) | |
478 $MULTU $t0,$t0 | |
479 subu $a2,1 | |
480 mflo $t1 | |
481 mfhi $t0 | |
482 $ST $t1,0($a0) | |
483 $ST $t0,$BNSZ($a0) | |
484 beqz $a2,.L_bn_sqr_words_return | |
485 | |
486 $LD $t0,$BNSZ($a1) | |
487 $MULTU $t0,$t0 | |
488 subu $a2,1 | |
489 mflo $t1 | |
490 mfhi $t0 | |
491 $ST $t1,2*$BNSZ($a0) | |
492 $ST $t0,3*$BNSZ($a0) | |
493 beqz $a2,.L_bn_sqr_words_return | |
494 | |
495 $LD $t0,2*$BNSZ($a1) | |
496 $MULTU $t0,$t0 | |
497 mflo $t1 | |
498 mfhi $t0 | |
499 $ST $t1,4*$BNSZ($a0) | |
500 $ST $t0,5*$BNSZ($a0) | |
501 | |
502 .L_bn_sqr_words_return: | |
503 .set noreorder | |
504 ___ | |
505 $code.=<<___ if ($flavour =~ /nubi/i); | |
506 $REG_L $t3,4*$SZREG($sp) | |
507 $REG_L $t2,3*$SZREG($sp) | |
508 $REG_L $t1,2*$SZREG($sp) | |
509 $REG_L $t0,1*$SZREG($sp) | |
510 $REG_L $gp,0*$SZREG($sp) | |
511 $PTR_ADD $sp,6*$SZREG | |
512 ___ | |
513 $code.=<<___; | |
514 jr $ra | |
515 move $a0,$v0 | |
516 | |
517 .end bn_sqr_words_internal | |
518 | |
519 .align 5 | |
520 .globl bn_add_words | |
521 .ent bn_add_words | |
522 bn_add_words: | |
523 .set noreorder | |
524 bgtz $a3,bn_add_words_internal | |
525 move $v0,$zero | |
526 jr $ra | |
527 move $a0,$v0 | |
528 .end bn_add_words | |
529 | |
530 .align 5 | |
531 .ent bn_add_words_internal | |
532 bn_add_words_internal: | |
533 ___ | |
534 $code.=<<___ if ($flavour =~ /nubi/i); | |
535 .frame $sp,6*$SZREG,$ra | |
536 .mask 0x8000f008,-$SZREG | |
537 .set noreorder | |
538 $PTR_SUB $sp,6*$SZREG | |
539 $REG_S $ra,5*$SZREG($sp) | |
540 $REG_S $t3,4*$SZREG($sp) | |
541 $REG_S $t2,3*$SZREG($sp) | |
542 $REG_S $t1,2*$SZREG($sp) | |
543 $REG_S $t0,1*$SZREG($sp) | |
544 $REG_S $gp,0*$SZREG($sp) | |
545 ___ | |
546 $code.=<<___; | |
547 .set reorder | |
548 li $minus4,-4 | |
549 and $at,$a3,$minus4 | |
550 $LD $t0,0($a1) | |
551 beqz $at,.L_bn_add_words_tail | |
552 | |
553 .L_bn_add_words_loop: | |
554 $LD $ta0,0($a2) | |
555 subu $a3,4 | |
556 $LD $t1,$BNSZ($a1) | |
557 and $at,$a3,$minus4 | |
558 $LD $t2,2*$BNSZ($a1) | |
559 $PTR_ADD $a2,4*$BNSZ | |
560 $LD $t3,3*$BNSZ($a1) | |
561 $PTR_ADD $a0,4*$BNSZ | |
562 $LD $ta1,-3*$BNSZ($a2) | |
563 $PTR_ADD $a1,4*$BNSZ | |
564 $LD $ta2,-2*$BNSZ($a2) | |
565 $LD $ta3,-$BNSZ($a2) | |
566 $ADDU $ta0,$t0 | |
567 sltu $t8,$ta0,$t0 | |
568 $ADDU $t0,$ta0,$v0 | |
569 sltu $v0,$t0,$ta0 | |
570 $ST $t0,-4*$BNSZ($a0) | |
571 $ADDU $v0,$t8 | |
572 | |
573 $ADDU $ta1,$t1 | |
574 sltu $t9,$ta1,$t1 | |
575 $ADDU $t1,$ta1,$v0 | |
576 sltu $v0,$t1,$ta1 | |
577 $ST $t1,-3*$BNSZ($a0) | |
578 $ADDU $v0,$t9 | |
579 | |
580 $ADDU $ta2,$t2 | |
581 sltu $t8,$ta2,$t2 | |
582 $ADDU $t2,$ta2,$v0 | |
583 sltu $v0,$t2,$ta2 | |
584 $ST $t2,-2*$BNSZ($a0) | |
585 $ADDU $v0,$t8 | |
586 | |
587 $ADDU $ta3,$t3 | |
588 sltu $t9,$ta3,$t3 | |
589 $ADDU $t3,$ta3,$v0 | |
590 sltu $v0,$t3,$ta3 | |
591 $ST $t3,-$BNSZ($a0) | |
592 $ADDU $v0,$t9 | |
593 | |
594 .set noreorder | |
595 bgtzl $at,.L_bn_add_words_loop | |
596 $LD $t0,0($a1) | |
597 | |
598 beqz $a3,.L_bn_add_words_return | |
599 nop | |
600 | |
601 .L_bn_add_words_tail: | |
602 .set reorder | |
603 $LD $t0,0($a1) | |
604 $LD $ta0,0($a2) | |
605 $ADDU $ta0,$t0 | |
606 subu $a3,1 | |
607 sltu $t8,$ta0,$t0 | |
608 $ADDU $t0,$ta0,$v0 | |
609 sltu $v0,$t0,$ta0 | |
610 $ST $t0,0($a0) | |
611 $ADDU $v0,$t8 | |
612 beqz $a3,.L_bn_add_words_return | |
613 | |
614 $LD $t1,$BNSZ($a1) | |
615 $LD $ta1,$BNSZ($a2) | |
616 $ADDU $ta1,$t1 | |
617 subu $a3,1 | |
618 sltu $t9,$ta1,$t1 | |
619 $ADDU $t1,$ta1,$v0 | |
620 sltu $v0,$t1,$ta1 | |
621 $ST $t1,$BNSZ($a0) | |
622 $ADDU $v0,$t9 | |
623 beqz $a3,.L_bn_add_words_return | |
624 | |
625 $LD $t2,2*$BNSZ($a1) | |
626 $LD $ta2,2*$BNSZ($a2) | |
627 $ADDU $ta2,$t2 | |
628 sltu $t8,$ta2,$t2 | |
629 $ADDU $t2,$ta2,$v0 | |
630 sltu $v0,$t2,$ta2 | |
631 $ST $t2,2*$BNSZ($a0) | |
632 $ADDU $v0,$t8 | |
633 | |
634 .L_bn_add_words_return: | |
635 .set noreorder | |
636 ___ | |
637 $code.=<<___ if ($flavour =~ /nubi/i); | |
638 $REG_L $t3,4*$SZREG($sp) | |
639 $REG_L $t2,3*$SZREG($sp) | |
640 $REG_L $t1,2*$SZREG($sp) | |
641 $REG_L $t0,1*$SZREG($sp) | |
642 $REG_L $gp,0*$SZREG($sp) | |
643 $PTR_ADD $sp,6*$SZREG | |
644 ___ | |
645 $code.=<<___; | |
646 jr $ra | |
647 move $a0,$v0 | |
648 | |
649 .end bn_add_words_internal | |
650 | |
651 .align 5 | |
652 .globl bn_sub_words | |
653 .ent bn_sub_words | |
654 bn_sub_words: | |
655 .set noreorder | |
656 bgtz $a3,bn_sub_words_internal | |
657 move $v0,$zero | |
658 jr $ra | |
659 move $a0,$zero | |
660 .end bn_sub_words | |
661 | |
662 .align 5 | |
663 .ent bn_sub_words_internal | |
664 bn_sub_words_internal: | |
665 ___ | |
666 $code.=<<___ if ($flavour =~ /nubi/i); | |
667 .frame $sp,6*$SZREG,$ra | |
668 .mask 0x8000f008,-$SZREG | |
669 .set noreorder | |
670 $PTR_SUB $sp,6*$SZREG | |
671 $REG_S $ra,5*$SZREG($sp) | |
672 $REG_S $t3,4*$SZREG($sp) | |
673 $REG_S $t2,3*$SZREG($sp) | |
674 $REG_S $t1,2*$SZREG($sp) | |
675 $REG_S $t0,1*$SZREG($sp) | |
676 $REG_S $gp,0*$SZREG($sp) | |
677 ___ | |
678 $code.=<<___; | |
679 .set reorder | |
680 li $minus4,-4 | |
681 and $at,$a3,$minus4 | |
682 $LD $t0,0($a1) | |
683 beqz $at,.L_bn_sub_words_tail | |
684 | |
685 .L_bn_sub_words_loop: | |
686 $LD $ta0,0($a2) | |
687 subu $a3,4 | |
688 $LD $t1,$BNSZ($a1) | |
689 and $at,$a3,$minus4 | |
690 $LD $t2,2*$BNSZ($a1) | |
691 $PTR_ADD $a2,4*$BNSZ | |
692 $LD $t3,3*$BNSZ($a1) | |
693 $PTR_ADD $a0,4*$BNSZ | |
694 $LD $ta1,-3*$BNSZ($a2) | |
695 $PTR_ADD $a1,4*$BNSZ | |
696 $LD $ta2,-2*$BNSZ($a2) | |
697 $LD $ta3,-$BNSZ($a2) | |
698 sltu $t8,$t0,$ta0 | |
699 $SUBU $ta0,$t0,$ta0 | |
700 $SUBU $t0,$ta0,$v0 | |
701 sgtu $v0,$t0,$ta0 | |
702 $ST $t0,-4*$BNSZ($a0) | |
703 $ADDU $v0,$t8 | |
704 | |
705 sltu $t9,$t1,$ta1 | |
706 $SUBU $ta1,$t1,$ta1 | |
707 $SUBU $t1,$ta1,$v0 | |
708 sgtu $v0,$t1,$ta1 | |
709 $ST $t1,-3*$BNSZ($a0) | |
710 $ADDU $v0,$t9 | |
711 | |
712 | |
713 sltu $t8,$t2,$ta2 | |
714 $SUBU $ta2,$t2,$ta2 | |
715 $SUBU $t2,$ta2,$v0 | |
716 sgtu $v0,$t2,$ta2 | |
717 $ST $t2,-2*$BNSZ($a0) | |
718 $ADDU $v0,$t8 | |
719 | |
720 sltu $t9,$t3,$ta3 | |
721 $SUBU $ta3,$t3,$ta3 | |
722 $SUBU $t3,$ta3,$v0 | |
723 sgtu $v0,$t3,$ta3 | |
724 $ST $t3,-$BNSZ($a0) | |
725 $ADDU $v0,$t9 | |
726 | |
727 .set noreorder | |
728 bgtzl $at,.L_bn_sub_words_loop | |
729 $LD $t0,0($a1) | |
730 | |
731 beqz $a3,.L_bn_sub_words_return | |
732 nop | |
733 | |
734 .L_bn_sub_words_tail: | |
735 .set reorder | |
736 $LD $t0,0($a1) | |
737 $LD $ta0,0($a2) | |
738 subu $a3,1 | |
739 sltu $t8,$t0,$ta0 | |
740 $SUBU $ta0,$t0,$ta0 | |
741 $SUBU $t0,$ta0,$v0 | |
742 sgtu $v0,$t0,$ta0 | |
743 $ST $t0,0($a0) | |
744 $ADDU $v0,$t8 | |
745 beqz $a3,.L_bn_sub_words_return | |
746 | |
747 $LD $t1,$BNSZ($a1) | |
748 subu $a3,1 | |
749 $LD $ta1,$BNSZ($a2) | |
750 sltu $t9,$t1,$ta1 | |
751 $SUBU $ta1,$t1,$ta1 | |
752 $SUBU $t1,$ta1,$v0 | |
753 sgtu $v0,$t1,$ta1 | |
754 $ST $t1,$BNSZ($a0) | |
755 $ADDU $v0,$t9 | |
756 beqz $a3,.L_bn_sub_words_return | |
757 | |
758 $LD $t2,2*$BNSZ($a1) | |
759 $LD $ta2,2*$BNSZ($a2) | |
760 sltu $t8,$t2,$ta2 | |
761 $SUBU $ta2,$t2,$ta2 | |
762 $SUBU $t2,$ta2,$v0 | |
763 sgtu $v0,$t2,$ta2 | |
764 $ST $t2,2*$BNSZ($a0) | |
765 $ADDU $v0,$t8 | |
766 | |
767 .L_bn_sub_words_return: | |
768 .set noreorder | |
769 ___ | |
770 $code.=<<___ if ($flavour =~ /nubi/i); | |
771 $REG_L $t3,4*$SZREG($sp) | |
772 $REG_L $t2,3*$SZREG($sp) | |
773 $REG_L $t1,2*$SZREG($sp) | |
774 $REG_L $t0,1*$SZREG($sp) | |
775 $REG_L $gp,0*$SZREG($sp) | |
776 $PTR_ADD $sp,6*$SZREG | |
777 ___ | |
778 $code.=<<___; | |
779 jr $ra | |
780 move $a0,$v0 | |
781 .end bn_sub_words_internal | |
782 | |
783 .align 5 | |
784 .globl bn_div_3_words | |
785 .ent bn_div_3_words | |
786 bn_div_3_words: | |
787 .set noreorder | |
788 move $a3,$a0 # we know that bn_div_words does not | |
789 # touch $a3, $ta2, $ta3 and preserves $a2 | |
790 # so that we can save two arguments | |
791 # and return address in registers | |
792 # instead of stack:-) | |
793 | |
794 $LD $a0,($a3) | |
795 move $ta2,$a1 | |
796 bne $a0,$a2,bn_div_3_words_internal | |
797 $LD $a1,-$BNSZ($a3) | |
798 li $v0,-1 | |
799 jr $ra | |
800 move $a0,$v0 | |
801 .end bn_div_3_words | |
802 | |
803 .align 5 | |
804 .ent bn_div_3_words_internal | |
805 bn_div_3_words_internal: | |
806 ___ | |
807 $code.=<<___ if ($flavour =~ /nubi/i); | |
808 .frame $sp,6*$SZREG,$ra | |
809 .mask 0x8000f008,-$SZREG | |
810 .set noreorder | |
811 $PTR_SUB $sp,6*$SZREG | |
812 $REG_S $ra,5*$SZREG($sp) | |
813 $REG_S $t3,4*$SZREG($sp) | |
814 $REG_S $t2,3*$SZREG($sp) | |
815 $REG_S $t1,2*$SZREG($sp) | |
816 $REG_S $t0,1*$SZREG($sp) | |
817 $REG_S $gp,0*$SZREG($sp) | |
818 ___ | |
819 $code.=<<___; | |
820 .set reorder | |
821 move $ta3,$ra | |
822 bal bn_div_words_internal | |
823 move $ra,$ta3 | |
824 $MULTU $ta2,$v0 | |
825 $LD $t2,-2*$BNSZ($a3) | |
826 move $ta0,$zero | |
827 mfhi $t1 | |
828 mflo $t0 | |
829 sltu $t8,$t1,$a1 | |
830 .L_bn_div_3_words_inner_loop: | |
831 bnez $t8,.L_bn_div_3_words_inner_loop_done | |
832 sgeu $at,$t2,$t0 | |
833 seq $t9,$t1,$a1 | |
834 and $at,$t9 | |
835 sltu $t3,$t0,$ta2 | |
836 $ADDU $a1,$a2 | |
837 $SUBU $t1,$t3 | |
838 $SUBU $t0,$ta2 | |
839 sltu $t8,$t1,$a1 | |
840 sltu $ta0,$a1,$a2 | |
841 or $t8,$ta0 | |
842 .set noreorder | |
843 beqzl $at,.L_bn_div_3_words_inner_loop | |
844 $SUBU $v0,1 | |
845 .set reorder | |
846 .L_bn_div_3_words_inner_loop_done: | |
847 .set noreorder | |
848 ___ | |
849 $code.=<<___ if ($flavour =~ /nubi/i); | |
850 $REG_L $t3,4*$SZREG($sp) | |
851 $REG_L $t2,3*$SZREG($sp) | |
852 $REG_L $t1,2*$SZREG($sp) | |
853 $REG_L $t0,1*$SZREG($sp) | |
854 $REG_L $gp,0*$SZREG($sp) | |
855 $PTR_ADD $sp,6*$SZREG | |
856 ___ | |
857 $code.=<<___; | |
858 jr $ra | |
859 move $a0,$v0 | |
860 .end bn_div_3_words_internal | |
861 | |
862 .align 5 | |
863 .globl bn_div_words | |
864 .ent bn_div_words | |
865 bn_div_words: | |
866 .set noreorder | |
867 bnez $a2,bn_div_words_internal | |
868 li $v0,-1 # I would rather signal div-by-zero | |
869 # which can be done with 'break 7' | |
870 jr $ra | |
871 move $a0,$v0 | |
872 .end bn_div_words | |
873 | |
874 .align 5 | |
875 .ent bn_div_words_internal | |
876 bn_div_words_internal: | |
877 ___ | |
878 $code.=<<___ if ($flavour =~ /nubi/i); | |
879 .frame $sp,6*$SZREG,$ra | |
880 .mask 0x8000f008,-$SZREG | |
881 .set noreorder | |
882 $PTR_SUB $sp,6*$SZREG | |
883 $REG_S $ra,5*$SZREG($sp) | |
884 $REG_S $t3,4*$SZREG($sp) | |
885 $REG_S $t2,3*$SZREG($sp) | |
886 $REG_S $t1,2*$SZREG($sp) | |
887 $REG_S $t0,1*$SZREG($sp) | |
888 $REG_S $gp,0*$SZREG($sp) | |
889 ___ | |
890 $code.=<<___; | |
891 move $v1,$zero | |
892 bltz $a2,.L_bn_div_words_body | |
893 move $t9,$v1 | |
894 $SLL $a2,1 | |
895 bgtz $a2,.-4 | |
896 addu $t9,1 | |
897 | |
898 .set reorder | |
899 negu $t1,$t9 | |
900 li $t2,-1 | |
901 $SLL $t2,$t1 | |
902 and $t2,$a0 | |
903 $SRL $at,$a1,$t1 | |
904 .set noreorder | |
905 bnezl $t2,.+8 | |
906 break 6 # signal overflow | |
907 .set reorder | |
908 $SLL $a0,$t9 | |
909 $SLL $a1,$t9 | |
910 or $a0,$at | |
911 ___ | |
912 $QT=$ta0; | |
913 $HH=$ta1; | |
914 $DH=$v1; | |
915 $code.=<<___; | |
916 .L_bn_div_words_body: | |
917 $SRL $DH,$a2,4*$BNSZ # bits | |
918 sgeu $at,$a0,$a2 | |
919 .set noreorder | |
920 bnezl $at,.+8 | |
921 $SUBU $a0,$a2 | |
922 .set reorder | |
923 | |
924 li $QT,-1 | |
925 $SRL $HH,$a0,4*$BNSZ # bits | |
926 $SRL $QT,4*$BNSZ # q=0xffffffff | |
927 beq $DH,$HH,.L_bn_div_words_skip_div1 | |
928 $DIVU $zero,$a0,$DH | |
929 mflo $QT | |
930 .L_bn_div_words_skip_div1: | |
931 $MULTU $a2,$QT | |
932 $SLL $t3,$a0,4*$BNSZ # bits | |
933 $SRL $at,$a1,4*$BNSZ # bits | |
934 or $t3,$at | |
935 mflo $t0 | |
936 mfhi $t1 | |
937 .L_bn_div_words_inner_loop1: | |
938 sltu $t2,$t3,$t0 | |
939 seq $t8,$HH,$t1 | |
940 sltu $at,$HH,$t1 | |
941 and $t2,$t8 | |
942 sltu $v0,$t0,$a2 | |
943 or $at,$t2 | |
944 .set noreorder | |
945 beqz $at,.L_bn_div_words_inner_loop1_done | |
946 $SUBU $t1,$v0 | |
947 $SUBU $t0,$a2 | |
948 b .L_bn_div_words_inner_loop1 | |
949 $SUBU $QT,1 | |
950 .set reorder | |
951 .L_bn_div_words_inner_loop1_done: | |
952 | |
953 $SLL $a1,4*$BNSZ # bits | |
954 $SUBU $a0,$t3,$t0 | |
955 $SLL $v0,$QT,4*$BNSZ # bits | |
956 | |
957 li $QT,-1 | |
958 $SRL $HH,$a0,4*$BNSZ # bits | |
959 $SRL $QT,4*$BNSZ # q=0xffffffff | |
960 beq $DH,$HH,.L_bn_div_words_skip_div2 | |
961 $DIVU $zero,$a0,$DH | |
962 mflo $QT | |
963 .L_bn_div_words_skip_div2: | |
964 $MULTU $a2,$QT | |
965 $SLL $t3,$a0,4*$BNSZ # bits | |
966 $SRL $at,$a1,4*$BNSZ # bits | |
967 or $t3,$at | |
968 mflo $t0 | |
969 mfhi $t1 | |
970 .L_bn_div_words_inner_loop2: | |
971 sltu $t2,$t3,$t0 | |
972 seq $t8,$HH,$t1 | |
973 sltu $at,$HH,$t1 | |
974 and $t2,$t8 | |
975 sltu $v1,$t0,$a2 | |
976 or $at,$t2 | |
977 .set noreorder | |
978 beqz $at,.L_bn_div_words_inner_loop2_done | |
979 $SUBU $t1,$v1 | |
980 $SUBU $t0,$a2 | |
981 b .L_bn_div_words_inner_loop2 | |
982 $SUBU $QT,1 | |
983 .set reorder | |
984 .L_bn_div_words_inner_loop2_done: | |
985 | |
986 $SUBU $a0,$t3,$t0 | |
987 or $v0,$QT | |
988 $SRL $v1,$a0,$t9 # $v1 contains remainder if anybody wants it | |
989 $SRL $a2,$t9 # restore $a2 | |
990 | |
991 .set noreorder | |
992 move $a1,$v1 | |
993 ___ | |
994 $code.=<<___ if ($flavour =~ /nubi/i); | |
995 $REG_L $t3,4*$SZREG($sp) | |
996 $REG_L $t2,3*$SZREG($sp) | |
997 $REG_L $t1,2*$SZREG($sp) | |
998 $REG_L $t0,1*$SZREG($sp) | |
999 $REG_L $gp,0*$SZREG($sp) | |
1000 $PTR_ADD $sp,6*$SZREG | |
1001 ___ | |
1002 $code.=<<___; | |
1003 jr $ra | |
1004 move $a0,$v0 | |
1005 .end bn_div_words_internal | |
1006 ___ | |
1007 undef $HH; undef $QT; undef $DH; | |
1008 | |
1009 ($a_0,$a_1,$a_2,$a_3)=($t0,$t1,$t2,$t3); | |
1010 ($b_0,$b_1,$b_2,$b_3)=($ta0,$ta1,$ta2,$ta3); | |
1011 | |
1012 ($a_4,$a_5,$a_6,$a_7)=($s0,$s2,$s4,$a1); # once we load a[7], no use for $a1 | |
1013 ($b_4,$b_5,$b_6,$b_7)=($s1,$s3,$s5,$a2); # once we load b[7], no use for $a2 | |
1014 | |
1015 ($t_1,$t_2,$c_1,$c_2,$c_3)=($t8,$t9,$v0,$v1,$a3); | |
1016 | |
1017 $code.=<<___; | |
1018 | |
1019 .align 5 | |
1020 .globl bn_mul_comba8 | |
1021 .ent bn_mul_comba8 | |
1022 bn_mul_comba8: | |
1023 .set noreorder | |
1024 ___ | |
1025 $code.=<<___ if ($flavour =~ /nubi/i); | |
1026 .frame $sp,12*$SZREG,$ra | |
1027 .mask 0x803ff008,-$SZREG | |
1028 $PTR_SUB $sp,12*$SZREG | |
1029 $REG_S $ra,11*$SZREG($sp) | |
1030 $REG_S $s5,10*$SZREG($sp) | |
1031 $REG_S $s4,9*$SZREG($sp) | |
1032 $REG_S $s3,8*$SZREG($sp) | |
1033 $REG_S $s2,7*$SZREG($sp) | |
1034 $REG_S $s1,6*$SZREG($sp) | |
1035 $REG_S $s0,5*$SZREG($sp) | |
1036 $REG_S $t3,4*$SZREG($sp) | |
1037 $REG_S $t2,3*$SZREG($sp) | |
1038 $REG_S $t1,2*$SZREG($sp) | |
1039 $REG_S $t0,1*$SZREG($sp) | |
1040 $REG_S $gp,0*$SZREG($sp) | |
1041 ___ | |
1042 $code.=<<___ if ($flavour !~ /nubi/i); | |
1043 .frame $sp,6*$SZREG,$ra | |
1044 .mask 0x003f0000,-$SZREG | |
1045 $PTR_SUB $sp,6*$SZREG | |
1046 $REG_S $s5,5*$SZREG($sp) | |
1047 $REG_S $s4,4*$SZREG($sp) | |
1048 $REG_S $s3,3*$SZREG($sp) | |
1049 $REG_S $s2,2*$SZREG($sp) | |
1050 $REG_S $s1,1*$SZREG($sp) | |
1051 $REG_S $s0,0*$SZREG($sp) | |
1052 ___ | |
1053 $code.=<<___; | |
1054 | |
1055 .set reorder | |
1056 $LD $a_0,0($a1) # If compiled with -mips3 option on | |
1057 # R5000 box assembler barks on this | |
1058 # 1ine with "should not have mult/div | |
1059 # as last instruction in bb (R10K | |
1060 # bug)" warning. If anybody out there | |
1061 # has a clue about how to circumvent | |
1062 # this do send me a note. | |
1063 # <appro\@fy.chalmers.se> | |
1064 | |
1065 $LD $b_0,0($a2) | |
1066 $LD $a_1,$BNSZ($a1) | |
1067 $LD $a_2,2*$BNSZ($a1) | |
1068 $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3); | |
1069 $LD $a_3,3*$BNSZ($a1) | |
1070 $LD $b_1,$BNSZ($a2) | |
1071 $LD $b_2,2*$BNSZ($a2) | |
1072 $LD $b_3,3*$BNSZ($a2) | |
1073 mflo $c_1 | |
1074 mfhi $c_2 | |
1075 | |
1076 $LD $a_4,4*$BNSZ($a1) | |
1077 $LD $a_5,5*$BNSZ($a1) | |
1078 $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1); | |
1079 $LD $a_6,6*$BNSZ($a1) | |
1080 $LD $a_7,7*$BNSZ($a1) | |
1081 $LD $b_4,4*$BNSZ($a2) | |
1082 $LD $b_5,5*$BNSZ($a2) | |
1083 mflo $t_1 | |
1084 mfhi $t_2 | |
1085 $ADDU $c_2,$t_1 | |
1086 sltu $at,$c_2,$t_1 | |
1087 $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1); | |
1088 $ADDU $c_3,$t_2,$at | |
1089 $LD $b_6,6*$BNSZ($a2) | |
1090 $LD $b_7,7*$BNSZ($a2) | |
1091 $ST $c_1,0($a0) # r[0]=c1; | |
1092 mflo $t_1 | |
1093 mfhi $t_2 | |
1094 $ADDU $c_2,$t_1 | |
1095 sltu $at,$c_2,$t_1 | |
1096 $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2); | |
1097 $ADDU $t_2,$at | |
1098 $ADDU $c_3,$t_2 | |
1099 sltu $c_1,$c_3,$t_2 | |
1100 $ST $c_2,$BNSZ($a0) # r[1]=c2; | |
1101 | |
1102 mflo $t_1 | |
1103 mfhi $t_2 | |
1104 $ADDU $c_3,$t_1 | |
1105 sltu $at,$c_3,$t_1 | |
1106 $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2); | |
1107 $ADDU $t_2,$at | |
1108 $ADDU $c_1,$t_2 | |
1109 mflo $t_1 | |
1110 mfhi $t_2 | |
1111 $ADDU $c_3,$t_1 | |
1112 sltu $at,$c_3,$t_1 | |
1113 $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2); | |
1114 $ADDU $t_2,$at | |
1115 $ADDU $c_1,$t_2 | |
1116 sltu $c_2,$c_1,$t_2 | |
1117 mflo $t_1 | |
1118 mfhi $t_2 | |
1119 $ADDU $c_3,$t_1 | |
1120 sltu $at,$c_3,$t_1 | |
1121 $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3); | |
1122 $ADDU $t_2,$at | |
1123 $ADDU $c_1,$t_2 | |
1124 sltu $at,$c_1,$t_2 | |
1125 $ADDU $c_2,$at | |
1126 $ST $c_3,2*$BNSZ($a0) # r[2]=c3; | |
1127 | |
1128 mflo $t_1 | |
1129 mfhi $t_2 | |
1130 $ADDU $c_1,$t_1 | |
1131 sltu $at,$c_1,$t_1 | |
1132 $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3); | |
1133 $ADDU $t_2,$at | |
1134 $ADDU $c_2,$t_2 | |
1135 sltu $c_3,$c_2,$t_2 | |
1136 mflo $t_1 | |
1137 mfhi $t_2 | |
1138 $ADDU $c_1,$t_1 | |
1139 sltu $at,$c_1,$t_1 | |
1140 $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3); | |
1141 $ADDU $t_2,$at | |
1142 $ADDU $c_2,$t_2 | |
1143 sltu $at,$c_2,$t_2 | |
1144 $ADDU $c_3,$at | |
1145 mflo $t_1 | |
1146 mfhi $t_2 | |
1147 $ADDU $c_1,$t_1 | |
1148 sltu $at,$c_1,$t_1 | |
1149 $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3); | |
1150 $ADDU $t_2,$at | |
1151 $ADDU $c_2,$t_2 | |
1152 sltu $at,$c_2,$t_2 | |
1153 $ADDU $c_3,$at | |
1154 mflo $t_1 | |
1155 mfhi $t_2 | |
1156 $ADDU $c_1,$t_1 | |
1157 sltu $at,$c_1,$t_1 | |
1158 $MULTU $a_4,$b_0 # mul_add_c(a[4],b[0],c2,c3,c1); | |
1159 $ADDU $t_2,$at | |
1160 $ADDU $c_2,$t_2 | |
1161 sltu $at,$c_2,$t_2 | |
1162 $ADDU $c_3,$at | |
1163 $ST $c_1,3*$BNSZ($a0) # r[3]=c1; | |
1164 | |
1165 mflo $t_1 | |
1166 mfhi $t_2 | |
1167 $ADDU $c_2,$t_1 | |
1168 sltu $at,$c_2,$t_1 | |
1169 $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1); | |
1170 $ADDU $t_2,$at | |
1171 $ADDU $c_3,$t_2 | |
1172 sltu $c_1,$c_3,$t_2 | |
1173 mflo $t_1 | |
1174 mfhi $t_2 | |
1175 $ADDU $c_2,$t_1 | |
1176 sltu $at,$c_2,$t_1 | |
1177 $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1); | |
1178 $ADDU $t_2,$at | |
1179 $ADDU $c_3,$t_2 | |
1180 sltu $at,$c_3,$t_2 | |
1181 $ADDU $c_1,$at | |
1182 mflo $t_1 | |
1183 mfhi $t_2 | |
1184 $ADDU $c_2,$t_1 | |
1185 sltu $at,$c_2,$t_1 | |
1186 $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1); | |
1187 $ADDU $t_2,$at | |
1188 $ADDU $c_3,$t_2 | |
1189 sltu $at,$c_3,$t_2 | |
1190 $ADDU $c_1,$at | |
1191 mflo $t_1 | |
1192 mfhi $t_2 | |
1193 $ADDU $c_2,$t_1 | |
1194 sltu $at,$c_2,$t_1 | |
1195 $MULTU $a_0,$b_4 # mul_add_c(a[0],b[4],c2,c3,c1); | |
1196 $ADDU $t_2,$at | |
1197 $ADDU $c_3,$t_2 | |
1198 sltu $at,$c_3,$t_2 | |
1199 $ADDU $c_1,$at | |
1200 mflo $t_1 | |
1201 mfhi $t_2 | |
1202 $ADDU $c_2,$t_1 | |
1203 sltu $at,$c_2,$t_1 | |
1204 $MULTU $a_0,$b_5 # mul_add_c(a[0],b[5],c3,c1,c2); | |
1205 $ADDU $t_2,$at | |
1206 $ADDU $c_3,$t_2 | |
1207 sltu $at,$c_3,$t_2 | |
1208 $ADDU $c_1,$at | |
1209 $ST $c_2,4*$BNSZ($a0) # r[4]=c2; | |
1210 | |
1211 mflo $t_1 | |
1212 mfhi $t_2 | |
1213 $ADDU $c_3,$t_1 | |
1214 sltu $at,$c_3,$t_1 | |
1215 $MULTU $a_1,$b_4 # mul_add_c(a[1],b[4],c3,c1,c2); | |
1216 $ADDU $t_2,$at | |
1217 $ADDU $c_1,$t_2 | |
1218 sltu $c_2,$c_1,$t_2 | |
1219 mflo $t_1 | |
1220 mfhi $t_2 | |
1221 $ADDU $c_3,$t_1 | |
1222 sltu $at,$c_3,$t_1 | |
1223 $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2); | |
1224 $ADDU $t_2,$at | |
1225 $ADDU $c_1,$t_2 | |
1226 sltu $at,$c_1,$t_2 | |
1227 $ADDU $c_2,$at | |
1228 mflo $t_1 | |
1229 mfhi $t_2 | |
1230 $ADDU $c_3,$t_1 | |
1231 sltu $at,$c_3,$t_1 | |
1232 $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2); | |
1233 $ADDU $t_2,$at | |
1234 $ADDU $c_1,$t_2 | |
1235 sltu $at,$c_1,$t_2 | |
1236 $ADDU $c_2,$at | |
1237 mflo $t_1 | |
1238 mfhi $t_2 | |
1239 $ADDU $c_3,$t_1 | |
1240 sltu $at,$c_3,$t_1 | |
1241 $MULTU $a_4,$b_1 # mul_add_c(a[4],b[1],c3,c1,c2); | |
1242 $ADDU $t_2,$at | |
1243 $ADDU $c_1,$t_2 | |
1244 sltu $at,$c_1,$t_2 | |
1245 $ADDU $c_2,$at | |
1246 mflo $t_1 | |
1247 mfhi $t_2 | |
1248 $ADDU $c_3,$t_1 | |
1249 sltu $at,$c_3,$t_1 | |
1250 $MULTU $a_5,$b_0 # mul_add_c(a[5],b[0],c3,c1,c2); | |
1251 $ADDU $t_2,$at | |
1252 $ADDU $c_1,$t_2 | |
1253 sltu $at,$c_1,$t_2 | |
1254 $ADDU $c_2,$at | |
1255 mflo $t_1 | |
1256 mfhi $t_2 | |
1257 $ADDU $c_3,$t_1 | |
1258 sltu $at,$c_3,$t_1 | |
1259 $MULTU $a_6,$b_0 # mul_add_c(a[6],b[0],c1,c2,c3); | |
1260 $ADDU $t_2,$at | |
1261 $ADDU $c_1,$t_2 | |
1262 sltu $at,$c_1,$t_2 | |
1263 $ADDU $c_2,$at | |
1264 $ST $c_3,5*$BNSZ($a0) # r[5]=c3; | |
1265 | |
1266 mflo $t_1 | |
1267 mfhi $t_2 | |
1268 $ADDU $c_1,$t_1 | |
1269 sltu $at,$c_1,$t_1 | |
1270 $MULTU $a_5,$b_1 # mul_add_c(a[5],b[1],c1,c2,c3); | |
1271 $ADDU $t_2,$at | |
1272 $ADDU $c_2,$t_2 | |
1273 sltu $c_3,$c_2,$t_2 | |
1274 mflo $t_1 | |
1275 mfhi $t_2 | |
1276 $ADDU $c_1,$t_1 | |
1277 sltu $at,$c_1,$t_1 | |
1278 $MULTU $a_4,$b_2 # mul_add_c(a[4],b[2],c1,c2,c3); | |
1279 $ADDU $t_2,$at | |
1280 $ADDU $c_2,$t_2 | |
1281 sltu $at,$c_2,$t_2 | |
1282 $ADDU $c_3,$at | |
1283 mflo $t_1 | |
1284 mfhi $t_2 | |
1285 $ADDU $c_1,$t_1 | |
1286 sltu $at,$c_1,$t_1 | |
1287 $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3); | |
1288 $ADDU $t_2,$at | |
1289 $ADDU $c_2,$t_2 | |
1290 sltu $at,$c_2,$t_2 | |
1291 $ADDU $c_3,$at | |
1292 mflo $t_1 | |
1293 mfhi $t_2 | |
1294 $ADDU $c_1,$t_1 | |
1295 sltu $at,$c_1,$t_1 | |
1296 $MULTU $a_2,$b_4 # mul_add_c(a[2],b[4],c1,c2,c3); | |
1297 $ADDU $t_2,$at | |
1298 $ADDU $c_2,$t_2 | |
1299 sltu $at,$c_2,$t_2 | |
1300 $ADDU $c_3,$at | |
1301 mflo $t_1 | |
1302 mfhi $t_2 | |
1303 $ADDU $c_1,$t_1 | |
1304 sltu $at,$c_1,$t_1 | |
1305 $MULTU $a_1,$b_5 # mul_add_c(a[1],b[5],c1,c2,c3); | |
1306 $ADDU $t_2,$at | |
1307 $ADDU $c_2,$t_2 | |
1308 sltu $at,$c_2,$t_2 | |
1309 $ADDU $c_3,$at | |
1310 mflo $t_1 | |
1311 mfhi $t_2 | |
1312 $ADDU $c_1,$t_1 | |
1313 sltu $at,$c_1,$t_1 | |
1314 $MULTU $a_0,$b_6 # mul_add_c(a[0],b[6],c1,c2,c3); | |
1315 $ADDU $t_2,$at | |
1316 $ADDU $c_2,$t_2 | |
1317 sltu $at,$c_2,$t_2 | |
1318 $ADDU $c_3,$at | |
1319 mflo $t_1 | |
1320 mfhi $t_2 | |
1321 $ADDU $c_1,$t_1 | |
1322 sltu $at,$c_1,$t_1 | |
1323 $MULTU $a_0,$b_7 # mul_add_c(a[0],b[7],c2,c3,c1); | |
1324 $ADDU $t_2,$at | |
1325 $ADDU $c_2,$t_2 | |
1326 sltu $at,$c_2,$t_2 | |
1327 $ADDU $c_3,$at | |
1328 $ST $c_1,6*$BNSZ($a0) # r[6]=c1; | |
1329 | |
1330 mflo $t_1 | |
1331 mfhi $t_2 | |
1332 $ADDU $c_2,$t_1 | |
1333 sltu $at,$c_2,$t_1 | |
1334 $MULTU $a_1,$b_6 # mul_add_c(a[1],b[6],c2,c3,c1); | |
1335 $ADDU $t_2,$at | |
1336 $ADDU $c_3,$t_2 | |
1337 sltu $c_1,$c_3,$t_2 | |
1338 mflo $t_1 | |
1339 mfhi $t_2 | |
1340 $ADDU $c_2,$t_1 | |
1341 sltu $at,$c_2,$t_1 | |
1342 $MULTU $a_2,$b_5 # mul_add_c(a[2],b[5],c2,c3,c1); | |
1343 $ADDU $t_2,$at | |
1344 $ADDU $c_3,$t_2 | |
1345 sltu $at,$c_3,$t_2 | |
1346 $ADDU $c_1,$at | |
1347 mflo $t_1 | |
1348 mfhi $t_2 | |
1349 $ADDU $c_2,$t_1 | |
1350 sltu $at,$c_2,$t_1 | |
1351 $MULTU $a_3,$b_4 # mul_add_c(a[3],b[4],c2,c3,c1); | |
1352 $ADDU $t_2,$at | |
1353 $ADDU $c_3,$t_2 | |
1354 sltu $at,$c_3,$t_2 | |
1355 $ADDU $c_1,$at | |
1356 mflo $t_1 | |
1357 mfhi $t_2 | |
1358 $ADDU $c_2,$t_1 | |
1359 sltu $at,$c_2,$t_1 | |
1360 $MULTU $a_4,$b_3 # mul_add_c(a[4],b[3],c2,c3,c1); | |
1361 $ADDU $t_2,$at | |
1362 $ADDU $c_3,$t_2 | |
1363 sltu $at,$c_3,$t_2 | |
1364 $ADDU $c_1,$at | |
1365 mflo $t_1 | |
1366 mfhi $t_2 | |
1367 $ADDU $c_2,$t_1 | |
1368 sltu $at,$c_2,$t_1 | |
1369 $MULTU $a_5,$b_2 # mul_add_c(a[5],b[2],c2,c3,c1); | |
1370 $ADDU $t_2,$at | |
1371 $ADDU $c_3,$t_2 | |
1372 sltu $at,$c_3,$t_2 | |
1373 $ADDU $c_1,$at | |
1374 mflo $t_1 | |
1375 mfhi $t_2 | |
1376 $ADDU $c_2,$t_1 | |
1377 sltu $at,$c_2,$t_1 | |
1378 $MULTU $a_6,$b_1 # mul_add_c(a[6],b[1],c2,c3,c1); | |
1379 $ADDU $t_2,$at | |
1380 $ADDU $c_3,$t_2 | |
1381 sltu $at,$c_3,$t_2 | |
1382 $ADDU $c_1,$at | |
1383 mflo $t_1 | |
1384 mfhi $t_2 | |
1385 $ADDU $c_2,$t_1 | |
1386 sltu $at,$c_2,$t_1 | |
1387 $MULTU $a_7,$b_0 # mul_add_c(a[7],b[0],c2,c3,c1); | |
1388 $ADDU $t_2,$at | |
1389 $ADDU $c_3,$t_2 | |
1390 sltu $at,$c_3,$t_2 | |
1391 $ADDU $c_1,$at | |
1392 mflo $t_1 | |
1393 mfhi $t_2 | |
1394 $ADDU $c_2,$t_1 | |
1395 sltu $at,$c_2,$t_1 | |
1396 $MULTU $a_7,$b_1 # mul_add_c(a[7],b[1],c3,c1,c2); | |
1397 $ADDU $t_2,$at | |
1398 $ADDU $c_3,$t_2 | |
1399 sltu $at,$c_3,$t_2 | |
1400 $ADDU $c_1,$at | |
1401 $ST $c_2,7*$BNSZ($a0) # r[7]=c2; | |
1402 | |
1403 mflo $t_1 | |
1404 mfhi $t_2 | |
1405 $ADDU $c_3,$t_1 | |
1406 sltu $at,$c_3,$t_1 | |
1407 $MULTU $a_6,$b_2 # mul_add_c(a[6],b[2],c3,c1,c2); | |
1408 $ADDU $t_2,$at | |
1409 $ADDU $c_1,$t_2 | |
1410 sltu $c_2,$c_1,$t_2 | |
1411 mflo $t_1 | |
1412 mfhi $t_2 | |
1413 $ADDU $c_3,$t_1 | |
1414 sltu $at,$c_3,$t_1 | |
1415 $MULTU $a_5,$b_3 # mul_add_c(a[5],b[3],c3,c1,c2); | |
1416 $ADDU $t_2,$at | |
1417 $ADDU $c_1,$t_2 | |
1418 sltu $at,$c_1,$t_2 | |
1419 $ADDU $c_2,$at | |
1420 mflo $t_1 | |
1421 mfhi $t_2 | |
1422 $ADDU $c_3,$t_1 | |
1423 sltu $at,$c_3,$t_1 | |
1424 $MULTU $a_4,$b_4 # mul_add_c(a[4],b[4],c3,c1,c2); | |
1425 $ADDU $t_2,$at | |
1426 $ADDU $c_1,$t_2 | |
1427 sltu $at,$c_1,$t_2 | |
1428 $ADDU $c_2,$at | |
1429 mflo $t_1 | |
1430 mfhi $t_2 | |
1431 $ADDU $c_3,$t_1 | |
1432 sltu $at,$c_3,$t_1 | |
1433 $MULTU $a_3,$b_5 # mul_add_c(a[3],b[5],c3,c1,c2); | |
1434 $ADDU $t_2,$at | |
1435 $ADDU $c_1,$t_2 | |
1436 sltu $at,$c_1,$t_2 | |
1437 $ADDU $c_2,$at | |
1438 mflo $t_1 | |
1439 mfhi $t_2 | |
1440 $ADDU $c_3,$t_1 | |
1441 sltu $at,$c_3,$t_1 | |
1442 $MULTU $a_2,$b_6 # mul_add_c(a[2],b[6],c3,c1,c2); | |
1443 $ADDU $t_2,$at | |
1444 $ADDU $c_1,$t_2 | |
1445 sltu $at,$c_1,$t_2 | |
1446 $ADDU $c_2,$at | |
1447 mflo $t_1 | |
1448 mfhi $t_2 | |
1449 $ADDU $c_3,$t_1 | |
1450 sltu $at,$c_3,$t_1 | |
1451 $MULTU $a_1,$b_7 # mul_add_c(a[1],b[7],c3,c1,c2); | |
1452 $ADDU $t_2,$at | |
1453 $ADDU $c_1,$t_2 | |
1454 sltu $at,$c_1,$t_2 | |
1455 $ADDU $c_2,$at | |
1456 mflo $t_1 | |
1457 mfhi $t_2 | |
1458 $ADDU $c_3,$t_1 | |
1459 sltu $at,$c_3,$t_1 | |
1460 $MULTU $a_2,$b_7 # mul_add_c(a[2],b[7],c1,c2,c3); | |
1461 $ADDU $t_2,$at | |
1462 $ADDU $c_1,$t_2 | |
1463 sltu $at,$c_1,$t_2 | |
1464 $ADDU $c_2,$at | |
1465 $ST $c_3,8*$BNSZ($a0) # r[8]=c3; | |
1466 | |
1467 mflo $t_1 | |
1468 mfhi $t_2 | |
1469 $ADDU $c_1,$t_1 | |
1470 sltu $at,$c_1,$t_1 | |
1471 $MULTU $a_3,$b_6 # mul_add_c(a[3],b[6],c1,c2,c3); | |
1472 $ADDU $t_2,$at | |
1473 $ADDU $c_2,$t_2 | |
1474 sltu $c_3,$c_2,$t_2 | |
1475 mflo $t_1 | |
1476 mfhi $t_2 | |
1477 $ADDU $c_1,$t_1 | |
1478 sltu $at,$c_1,$t_1 | |
1479 $MULTU $a_4,$b_5 # mul_add_c(a[4],b[5],c1,c2,c3); | |
1480 $ADDU $t_2,$at | |
1481 $ADDU $c_2,$t_2 | |
1482 sltu $at,$c_2,$t_2 | |
1483 $ADDU $c_3,$at | |
1484 mflo $t_1 | |
1485 mfhi $t_2 | |
1486 $ADDU $c_1,$t_1 | |
1487 sltu $at,$c_1,$t_1 | |
1488 $MULTU $a_5,$b_4 # mul_add_c(a[5],b[4],c1,c2,c3); | |
1489 $ADDU $t_2,$at | |
1490 $ADDU $c_2,$t_2 | |
1491 sltu $at,$c_2,$t_2 | |
1492 $ADDU $c_3,$at | |
1493 mflo $t_1 | |
1494 mfhi $t_2 | |
1495 $ADDU $c_1,$t_1 | |
1496 sltu $at,$c_1,$t_1 | |
1497 $MULTU $a_6,$b_3 # mul_add_c(a[6],b[3],c1,c2,c3); | |
1498 $ADDU $t_2,$at | |
1499 $ADDU $c_2,$t_2 | |
1500 sltu $at,$c_2,$t_2 | |
1501 $ADDU $c_3,$at | |
1502 mflo $t_1 | |
1503 mfhi $t_2 | |
1504 $ADDU $c_1,$t_1 | |
1505 sltu $at,$c_1,$t_1 | |
1506 $MULTU $a_7,$b_2 # mul_add_c(a[7],b[2],c1,c2,c3); | |
1507 $ADDU $t_2,$at | |
1508 $ADDU $c_2,$t_2 | |
1509 sltu $at,$c_2,$t_2 | |
1510 $ADDU $c_3,$at | |
1511 mflo $t_1 | |
1512 mfhi $t_2 | |
1513 $ADDU $c_1,$t_1 | |
1514 sltu $at,$c_1,$t_1 | |
1515 $MULTU $a_7,$b_3 # mul_add_c(a[7],b[3],c2,c3,c1); | |
1516 $ADDU $t_2,$at | |
1517 $ADDU $c_2,$t_2 | |
1518 sltu $at,$c_2,$t_2 | |
1519 $ADDU $c_3,$at | |
1520 $ST $c_1,9*$BNSZ($a0) # r[9]=c1; | |
1521 | |
1522 mflo $t_1 | |
1523 mfhi $t_2 | |
1524 $ADDU $c_2,$t_1 | |
1525 sltu $at,$c_2,$t_1 | |
1526 $MULTU $a_6,$b_4 # mul_add_c(a[6],b[4],c2,c3,c1); | |
1527 $ADDU $t_2,$at | |
1528 $ADDU $c_3,$t_2 | |
1529 sltu $c_1,$c_3,$t_2 | |
1530 mflo $t_1 | |
1531 mfhi $t_2 | |
1532 $ADDU $c_2,$t_1 | |
1533 sltu $at,$c_2,$t_1 | |
1534 $MULTU $a_5,$b_5 # mul_add_c(a[5],b[5],c2,c3,c1); | |
1535 $ADDU $t_2,$at | |
1536 $ADDU $c_3,$t_2 | |
1537 sltu $at,$c_3,$t_2 | |
1538 $ADDU $c_1,$at | |
1539 mflo $t_1 | |
1540 mfhi $t_2 | |
1541 $ADDU $c_2,$t_1 | |
1542 sltu $at,$c_2,$t_1 | |
1543 $MULTU $a_4,$b_6 # mul_add_c(a[4],b[6],c2,c3,c1); | |
1544 $ADDU $t_2,$at | |
1545 $ADDU $c_3,$t_2 | |
1546 sltu $at,$c_3,$t_2 | |
1547 $ADDU $c_1,$at | |
1548 mflo $t_1 | |
1549 mfhi $t_2 | |
1550 $ADDU $c_2,$t_1 | |
1551 sltu $at,$c_2,$t_1 | |
1552 $MULTU $a_3,$b_7 # mul_add_c(a[3],b[7],c2,c3,c1); | |
1553 $ADDU $t_2,$at | |
1554 $ADDU $c_3,$t_2 | |
1555 sltu $at,$c_3,$t_2 | |
1556 $ADDU $c_1,$at | |
1557 mflo $t_1 | |
1558 mfhi $t_2 | |
1559 $ADDU $c_2,$t_1 | |
1560 sltu $at,$c_2,$t_1 | |
1561 $MULTU $a_4,$b_7 # mul_add_c(a[4],b[7],c3,c1,c2); | |
1562 $ADDU $t_2,$at | |
1563 $ADDU $c_3,$t_2 | |
1564 sltu $at,$c_3,$t_2 | |
1565 $ADDU $c_1,$at | |
1566 $ST $c_2,10*$BNSZ($a0) # r[10]=c2; | |
1567 | |
1568 mflo $t_1 | |
1569 mfhi $t_2 | |
1570 $ADDU $c_3,$t_1 | |
1571 sltu $at,$c_3,$t_1 | |
1572 $MULTU $a_5,$b_6 # mul_add_c(a[5],b[6],c3,c1,c2); | |
1573 $ADDU $t_2,$at | |
1574 $ADDU $c_1,$t_2 | |
1575 sltu $c_2,$c_1,$t_2 | |
1576 mflo $t_1 | |
1577 mfhi $t_2 | |
1578 $ADDU $c_3,$t_1 | |
1579 sltu $at,$c_3,$t_1 | |
1580 $MULTU $a_6,$b_5 # mul_add_c(a[6],b[5],c3,c1,c2); | |
1581 $ADDU $t_2,$at | |
1582 $ADDU $c_1,$t_2 | |
1583 sltu $at,$c_1,$t_2 | |
1584 $ADDU $c_2,$at | |
1585 mflo $t_1 | |
1586 mfhi $t_2 | |
1587 $ADDU $c_3,$t_1 | |
1588 sltu $at,$c_3,$t_1 | |
1589 $MULTU $a_7,$b_4 # mul_add_c(a[7],b[4],c3,c1,c2); | |
1590 $ADDU $t_2,$at | |
1591 $ADDU $c_1,$t_2 | |
1592 sltu $at,$c_1,$t_2 | |
1593 $ADDU $c_2,$at | |
1594 mflo $t_1 | |
1595 mfhi $t_2 | |
1596 $ADDU $c_3,$t_1 | |
1597 sltu $at,$c_3,$t_1 | |
1598 $MULTU $a_7,$b_5 # mul_add_c(a[7],b[5],c1,c2,c3); | |
1599 $ADDU $t_2,$at | |
1600 $ADDU $c_1,$t_2 | |
1601 sltu $at,$c_1,$t_2 | |
1602 $ADDU $c_2,$at | |
1603 $ST $c_3,11*$BNSZ($a0) # r[11]=c3; | |
1604 | |
1605 mflo $t_1 | |
1606 mfhi $t_2 | |
1607 $ADDU $c_1,$t_1 | |
1608 sltu $at,$c_1,$t_1 | |
1609 $MULTU $a_6,$b_6 # mul_add_c(a[6],b[6],c1,c2,c3); | |
1610 $ADDU $t_2,$at | |
1611 $ADDU $c_2,$t_2 | |
1612 sltu $c_3,$c_2,$t_2 | |
1613 mflo $t_1 | |
1614 mfhi $t_2 | |
1615 $ADDU $c_1,$t_1 | |
1616 sltu $at,$c_1,$t_1 | |
1617 $MULTU $a_5,$b_7 # mul_add_c(a[5],b[7],c1,c2,c3); | |
1618 $ADDU $t_2,$at | |
1619 $ADDU $c_2,$t_2 | |
1620 sltu $at,$c_2,$t_2 | |
1621 $ADDU $c_3,$at | |
1622 mflo $t_1 | |
1623 mfhi $t_2 | |
1624 $ADDU $c_1,$t_1 | |
1625 sltu $at,$c_1,$t_1 | |
1626 $MULTU $a_6,$b_7 # mul_add_c(a[6],b[7],c2,c3,c1); | |
1627 $ADDU $t_2,$at | |
1628 $ADDU $c_2,$t_2 | |
1629 sltu $at,$c_2,$t_2 | |
1630 $ADDU $c_3,$at | |
1631 $ST $c_1,12*$BNSZ($a0) # r[12]=c1; | |
1632 | |
1633 mflo $t_1 | |
1634 mfhi $t_2 | |
1635 $ADDU $c_2,$t_1 | |
1636 sltu $at,$c_2,$t_1 | |
1637 $MULTU $a_7,$b_6 # mul_add_c(a[7],b[6],c2,c3,c1); | |
1638 $ADDU $t_2,$at | |
1639 $ADDU $c_3,$t_2 | |
1640 sltu $c_1,$c_3,$t_2 | |
1641 mflo $t_1 | |
1642 mfhi $t_2 | |
1643 $ADDU $c_2,$t_1 | |
1644 sltu $at,$c_2,$t_1 | |
1645 $MULTU $a_7,$b_7 # mul_add_c(a[7],b[7],c3,c1,c2); | |
1646 $ADDU $t_2,$at | |
1647 $ADDU $c_3,$t_2 | |
1648 sltu $at,$c_3,$t_2 | |
1649 $ADDU $c_1,$at | |
1650 $ST $c_2,13*$BNSZ($a0) # r[13]=c2; | |
1651 | |
1652 mflo $t_1 | |
1653 mfhi $t_2 | |
1654 $ADDU $c_3,$t_1 | |
1655 sltu $at,$c_3,$t_1 | |
1656 $ADDU $t_2,$at | |
1657 $ADDU $c_1,$t_2 | |
1658 $ST $c_3,14*$BNSZ($a0) # r[14]=c3; | |
1659 $ST $c_1,15*$BNSZ($a0) # r[15]=c1; | |
1660 | |
1661 .set noreorder | |
1662 ___ | |
1663 $code.=<<___ if ($flavour =~ /nubi/i); | |
1664 $REG_L $s5,10*$SZREG($sp) | |
1665 $REG_L $s4,9*$SZREG($sp) | |
1666 $REG_L $s3,8*$SZREG($sp) | |
1667 $REG_L $s2,7*$SZREG($sp) | |
1668 $REG_L $s1,6*$SZREG($sp) | |
1669 $REG_L $s0,5*$SZREG($sp) | |
1670 $REG_L $t3,4*$SZREG($sp) | |
1671 $REG_L $t2,3*$SZREG($sp) | |
1672 $REG_L $t1,2*$SZREG($sp) | |
1673 $REG_L $t0,1*$SZREG($sp) | |
1674 $REG_L $gp,0*$SZREG($sp) | |
1675 jr $ra | |
1676 $PTR_ADD $sp,12*$SZREG | |
1677 ___ | |
1678 $code.=<<___ if ($flavour !~ /nubi/i); | |
1679 $REG_L $s5,5*$SZREG($sp) | |
1680 $REG_L $s4,4*$SZREG($sp) | |
1681 $REG_L $s3,3*$SZREG($sp) | |
1682 $REG_L $s2,2*$SZREG($sp) | |
1683 $REG_L $s1,1*$SZREG($sp) | |
1684 $REG_L $s0,0*$SZREG($sp) | |
1685 jr $ra | |
1686 $PTR_ADD $sp,6*$SZREG | |
1687 ___ | |
1688 $code.=<<___; | |
1689 .end bn_mul_comba8 | |
1690 | |
1691 .align 5 | |
1692 .globl bn_mul_comba4 | |
1693 .ent bn_mul_comba4 | |
1694 bn_mul_comba4: | |
1695 ___ | |
1696 $code.=<<___ if ($flavour =~ /nubi/i); | |
1697 .frame $sp,6*$SZREG,$ra | |
1698 .mask 0x8000f008,-$SZREG | |
1699 .set noreorder | |
1700 $PTR_SUB $sp,6*$SZREG | |
1701 $REG_S $ra,5*$SZREG($sp) | |
1702 $REG_S $t3,4*$SZREG($sp) | |
1703 $REG_S $t2,3*$SZREG($sp) | |
1704 $REG_S $t1,2*$SZREG($sp) | |
1705 $REG_S $t0,1*$SZREG($sp) | |
1706 $REG_S $gp,0*$SZREG($sp) | |
1707 ___ | |
1708 $code.=<<___; | |
1709 .set reorder | |
1710 $LD $a_0,0($a1) | |
1711 $LD $b_0,0($a2) | |
1712 $LD $a_1,$BNSZ($a1) | |
1713 $LD $a_2,2*$BNSZ($a1) | |
1714 $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3); | |
1715 $LD $a_3,3*$BNSZ($a1) | |
1716 $LD $b_1,$BNSZ($a2) | |
1717 $LD $b_2,2*$BNSZ($a2) | |
1718 $LD $b_3,3*$BNSZ($a2) | |
1719 mflo $c_1 | |
1720 mfhi $c_2 | |
1721 $ST $c_1,0($a0) | |
1722 | |
1723 $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1); | |
1724 mflo $t_1 | |
1725 mfhi $t_2 | |
1726 $ADDU $c_2,$t_1 | |
1727 sltu $at,$c_2,$t_1 | |
1728 $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1); | |
1729 $ADDU $c_3,$t_2,$at | |
1730 mflo $t_1 | |
1731 mfhi $t_2 | |
1732 $ADDU $c_2,$t_1 | |
1733 sltu $at,$c_2,$t_1 | |
1734 $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2); | |
1735 $ADDU $t_2,$at | |
1736 $ADDU $c_3,$t_2 | |
1737 sltu $c_1,$c_3,$t_2 | |
1738 $ST $c_2,$BNSZ($a0) | |
1739 | |
1740 mflo $t_1 | |
1741 mfhi $t_2 | |
1742 $ADDU $c_3,$t_1 | |
1743 sltu $at,$c_3,$t_1 | |
1744 $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2); | |
1745 $ADDU $t_2,$at | |
1746 $ADDU $c_1,$t_2 | |
1747 mflo $t_1 | |
1748 mfhi $t_2 | |
1749 $ADDU $c_3,$t_1 | |
1750 sltu $at,$c_3,$t_1 | |
1751 $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2); | |
1752 $ADDU $t_2,$at | |
1753 $ADDU $c_1,$t_2 | |
1754 sltu $c_2,$c_1,$t_2 | |
1755 mflo $t_1 | |
1756 mfhi $t_2 | |
1757 $ADDU $c_3,$t_1 | |
1758 sltu $at,$c_3,$t_1 | |
1759 $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3); | |
1760 $ADDU $t_2,$at | |
1761 $ADDU $c_1,$t_2 | |
1762 sltu $at,$c_1,$t_2 | |
1763 $ADDU $c_2,$at | |
1764 $ST $c_3,2*$BNSZ($a0) | |
1765 | |
1766 mflo $t_1 | |
1767 mfhi $t_2 | |
1768 $ADDU $c_1,$t_1 | |
1769 sltu $at,$c_1,$t_1 | |
1770 $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3); | |
1771 $ADDU $t_2,$at | |
1772 $ADDU $c_2,$t_2 | |
1773 sltu $c_3,$c_2,$t_2 | |
1774 mflo $t_1 | |
1775 mfhi $t_2 | |
1776 $ADDU $c_1,$t_1 | |
1777 sltu $at,$c_1,$t_1 | |
1778 $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3); | |
1779 $ADDU $t_2,$at | |
1780 $ADDU $c_2,$t_2 | |
1781 sltu $at,$c_2,$t_2 | |
1782 $ADDU $c_3,$at | |
1783 mflo $t_1 | |
1784 mfhi $t_2 | |
1785 $ADDU $c_1,$t_1 | |
1786 sltu $at,$c_1,$t_1 | |
1787 $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3); | |
1788 $ADDU $t_2,$at | |
1789 $ADDU $c_2,$t_2 | |
1790 sltu $at,$c_2,$t_2 | |
1791 $ADDU $c_3,$at | |
1792 mflo $t_1 | |
1793 mfhi $t_2 | |
1794 $ADDU $c_1,$t_1 | |
1795 sltu $at,$c_1,$t_1 | |
1796 $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1); | |
1797 $ADDU $t_2,$at | |
1798 $ADDU $c_2,$t_2 | |
1799 sltu $at,$c_2,$t_2 | |
1800 $ADDU $c_3,$at | |
1801 $ST $c_1,3*$BNSZ($a0) | |
1802 | |
1803 mflo $t_1 | |
1804 mfhi $t_2 | |
1805 $ADDU $c_2,$t_1 | |
1806 sltu $at,$c_2,$t_1 | |
1807 $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1); | |
1808 $ADDU $t_2,$at | |
1809 $ADDU $c_3,$t_2 | |
1810 sltu $c_1,$c_3,$t_2 | |
1811 mflo $t_1 | |
1812 mfhi $t_2 | |
1813 $ADDU $c_2,$t_1 | |
1814 sltu $at,$c_2,$t_1 | |
1815 $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1); | |
1816 $ADDU $t_2,$at | |
1817 $ADDU $c_3,$t_2 | |
1818 sltu $at,$c_3,$t_2 | |
1819 $ADDU $c_1,$at | |
1820 mflo $t_1 | |
1821 mfhi $t_2 | |
1822 $ADDU $c_2,$t_1 | |
1823 sltu $at,$c_2,$t_1 | |
1824 $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2); | |
1825 $ADDU $t_2,$at | |
1826 $ADDU $c_3,$t_2 | |
1827 sltu $at,$c_3,$t_2 | |
1828 $ADDU $c_1,$at | |
1829 $ST $c_2,4*$BNSZ($a0) | |
1830 | |
1831 mflo $t_1 | |
1832 mfhi $t_2 | |
1833 $ADDU $c_3,$t_1 | |
1834 sltu $at,$c_3,$t_1 | |
1835 $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2); | |
1836 $ADDU $t_2,$at | |
1837 $ADDU $c_1,$t_2 | |
1838 sltu $c_2,$c_1,$t_2 | |
1839 mflo $t_1 | |
1840 mfhi $t_2 | |
1841 $ADDU $c_3,$t_1 | |
1842 sltu $at,$c_3,$t_1 | |
1843 $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3); | |
1844 $ADDU $t_2,$at | |
1845 $ADDU $c_1,$t_2 | |
1846 sltu $at,$c_1,$t_2 | |
1847 $ADDU $c_2,$at | |
1848 $ST $c_3,5*$BNSZ($a0) | |
1849 | |
1850 mflo $t_1 | |
1851 mfhi $t_2 | |
1852 $ADDU $c_1,$t_1 | |
1853 sltu $at,$c_1,$t_1 | |
1854 $ADDU $t_2,$at | |
1855 $ADDU $c_2,$t_2 | |
1856 $ST $c_1,6*$BNSZ($a0) | |
1857 $ST $c_2,7*$BNSZ($a0) | |
1858 | |
1859 .set noreorder | |
1860 ___ | |
1861 $code.=<<___ if ($flavour =~ /nubi/i); | |
1862 $REG_L $t3,4*$SZREG($sp) | |
1863 $REG_L $t2,3*$SZREG($sp) | |
1864 $REG_L $t1,2*$SZREG($sp) | |
1865 $REG_L $t0,1*$SZREG($sp) | |
1866 $REG_L $gp,0*$SZREG($sp) | |
1867 $PTR_ADD $sp,6*$SZREG | |
1868 ___ | |
1869 $code.=<<___; | |
1870 jr $ra | |
1871 nop | |
1872 .end bn_mul_comba4 | |
1873 ___ | |
1874 | |
1875 ($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); | |
1876 | |
1877 $code.=<<___; | |
1878 | |
1879 .align 5 | |
1880 .globl bn_sqr_comba8 | |
1881 .ent bn_sqr_comba8 | |
1882 bn_sqr_comba8: | |
1883 ___ | |
1884 $code.=<<___ if ($flavour =~ /nubi/i); | |
1885 .frame $sp,6*$SZREG,$ra | |
1886 .mask 0x8000f008,-$SZREG | |
1887 .set noreorder | |
1888 $PTR_SUB $sp,6*$SZREG | |
1889 $REG_S $ra,5*$SZREG($sp) | |
1890 $REG_S $t3,4*$SZREG($sp) | |
1891 $REG_S $t2,3*$SZREG($sp) | |
1892 $REG_S $t1,2*$SZREG($sp) | |
1893 $REG_S $t0,1*$SZREG($sp) | |
1894 $REG_S $gp,0*$SZREG($sp) | |
1895 ___ | |
1896 $code.=<<___; | |
1897 .set reorder | |
1898 $LD $a_0,0($a1) | |
1899 $LD $a_1,$BNSZ($a1) | |
1900 $LD $a_2,2*$BNSZ($a1) | |
1901 $LD $a_3,3*$BNSZ($a1) | |
1902 | |
1903 $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3); | |
1904 $LD $a_4,4*$BNSZ($a1) | |
1905 $LD $a_5,5*$BNSZ($a1) | |
1906 $LD $a_6,6*$BNSZ($a1) | |
1907 $LD $a_7,7*$BNSZ($a1) | |
1908 mflo $c_1 | |
1909 mfhi $c_2 | |
1910 $ST $c_1,0($a0) | |
1911 | |
1912 $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1); | |
1913 mflo $t_1 | |
1914 mfhi $t_2 | |
1915 slt $c_1,$t_2,$zero | |
1916 $SLL $t_2,1 | |
1917 $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2); | |
1918 slt $a2,$t_1,$zero | |
1919 $ADDU $t_2,$a2 | |
1920 $SLL $t_1,1 | |
1921 $ADDU $c_2,$t_1 | |
1922 sltu $at,$c_2,$t_1 | |
1923 $ADDU $c_3,$t_2,$at | |
1924 $ST $c_2,$BNSZ($a0) | |
1925 | |
1926 mflo $t_1 | |
1927 mfhi $t_2 | |
1928 slt $c_2,$t_2,$zero | |
1929 $SLL $t_2,1 | |
1930 $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); | |
1931 slt $a2,$t_1,$zero | |
1932 $ADDU $t_2,$a2 | |
1933 $SLL $t_1,1 | |
1934 $ADDU $c_3,$t_1 | |
1935 sltu $at,$c_3,$t_1 | |
1936 $ADDU $t_2,$at | |
1937 $ADDU $c_1,$t_2 | |
1938 sltu $at,$c_1,$t_2 | |
1939 $ADDU $c_2,$at | |
1940 mflo $t_1 | |
1941 mfhi $t_2 | |
1942 $ADDU $c_3,$t_1 | |
1943 sltu $at,$c_3,$t_1 | |
1944 $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3); | |
1945 $ADDU $t_2,$at | |
1946 $ADDU $c_1,$t_2 | |
1947 sltu $at,$c_1,$t_2 | |
1948 $ADDU $c_2,$at | |
1949 $ST $c_3,2*$BNSZ($a0) | |
1950 | |
1951 mflo $t_1 | |
1952 mfhi $t_2 | |
1953 slt $c_3,$t_2,$zero | |
1954 $SLL $t_2,1 | |
1955 $MULTU $a_1,$a_2 # mul_add_c2(a[1],b[2],c1,c2,c3); | |
1956 slt $a2,$t_1,$zero | |
1957 $ADDU $t_2,$a2 | |
1958 $SLL $t_1,1 | |
1959 $ADDU $c_1,$t_1 | |
1960 sltu $at,$c_1,$t_1 | |
1961 $ADDU $t_2,$at | |
1962 $ADDU $c_2,$t_2 | |
1963 sltu $at,$c_2,$t_2 | |
1964 $ADDU $c_3,$at | |
1965 mflo $t_1 | |
1966 mfhi $t_2 | |
1967 slt $at,$t_2,$zero | |
1968 $ADDU $c_3,$at | |
1969 $MULTU $a_4,$a_0 # mul_add_c2(a[4],b[0],c2,c3,c1); | |
1970 $SLL $t_2,1 | |
1971 slt $a2,$t_1,$zero | |
1972 $ADDU $t_2,$a2 | |
1973 $SLL $t_1,1 | |
1974 $ADDU $c_1,$t_1 | |
1975 sltu $at,$c_1,$t_1 | |
1976 $ADDU $t_2,$at | |
1977 $ADDU $c_2,$t_2 | |
1978 sltu $at,$c_2,$t_2 | |
1979 $ADDU $c_3,$at | |
1980 $ST $c_1,3*$BNSZ($a0) | |
1981 | |
1982 mflo $t_1 | |
1983 mfhi $t_2 | |
1984 slt $c_1,$t_2,$zero | |
1985 $SLL $t_2,1 | |
1986 $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); | |
1987 slt $a2,$t_1,$zero | |
1988 $ADDU $t_2,$a2 | |
1989 $SLL $t_1,1 | |
1990 $ADDU $c_2,$t_1 | |
1991 sltu $at,$c_2,$t_1 | |
1992 $ADDU $t_2,$at | |
1993 $ADDU $c_3,$t_2 | |
1994 sltu $at,$c_3,$t_2 | |
1995 $ADDU $c_1,$at | |
1996 mflo $t_1 | |
1997 mfhi $t_2 | |
1998 slt $at,$t_2,$zero | |
1999 $ADDU $c_1,$at | |
2000 $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); | |
2001 $SLL $t_2,1 | |
2002 slt $a2,$t_1,$zero | |
2003 $ADDU $t_2,$a2 | |
2004 $SLL $t_1,1 | |
2005 $ADDU $c_2,$t_1 | |
2006 sltu $at,$c_2,$t_1 | |
2007 $ADDU $t_2,$at | |
2008 $ADDU $c_3,$t_2 | |
2009 sltu $at,$c_3,$t_2 | |
2010 $ADDU $c_1,$at | |
2011 mflo $t_1 | |
2012 mfhi $t_2 | |
2013 $ADDU $c_2,$t_1 | |
2014 sltu $at,$c_2,$t_1 | |
2015 $MULTU $a_0,$a_5 # mul_add_c2(a[0],b[5],c3,c1,c2); | |
2016 $ADDU $t_2,$at | |
2017 $ADDU $c_3,$t_2 | |
2018 sltu $at,$c_3,$t_2 | |
2019 $ADDU $c_1,$at | |
2020 $ST $c_2,4*$BNSZ($a0) | |
2021 | |
2022 mflo $t_1 | |
2023 mfhi $t_2 | |
2024 slt $c_2,$t_2,$zero | |
2025 $SLL $t_2,1 | |
2026 $MULTU $a_1,$a_4 # mul_add_c2(a[1],b[4],c3,c1,c2); | |
2027 slt $a2,$t_1,$zero | |
2028 $ADDU $t_2,$a2 | |
2029 $SLL $t_1,1 | |
2030 $ADDU $c_3,$t_1 | |
2031 sltu $at,$c_3,$t_1 | |
2032 $ADDU $t_2,$at | |
2033 $ADDU $c_1,$t_2 | |
2034 sltu $at,$c_1,$t_2 | |
2035 $ADDU $c_2,$at | |
2036 mflo $t_1 | |
2037 mfhi $t_2 | |
2038 slt $at,$t_2,$zero | |
2039 $ADDU $c_2,$at | |
2040 $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); | |
2041 $SLL $t_2,1 | |
2042 slt $a2,$t_1,$zero | |
2043 $ADDU $t_2,$a2 | |
2044 $SLL $t_1,1 | |
2045 $ADDU $c_3,$t_1 | |
2046 sltu $at,$c_3,$t_1 | |
2047 $ADDU $t_2,$at | |
2048 $ADDU $c_1,$t_2 | |
2049 sltu $at,$c_1,$t_2 | |
2050 $ADDU $c_2,$at | |
2051 mflo $t_1 | |
2052 mfhi $t_2 | |
2053 slt $at,$t_2,$zero | |
2054 $MULTU $a_6,$a_0 # mul_add_c2(a[6],b[0],c1,c2,c3); | |
2055 $ADDU $c_2,$at | |
2056 $SLL $t_2,1 | |
2057 slt $a2,$t_1,$zero | |
2058 $ADDU $t_2,$a2 | |
2059 $SLL $t_1,1 | |
2060 $ADDU $c_3,$t_1 | |
2061 sltu $at,$c_3,$t_1 | |
2062 $ADDU $t_2,$at | |
2063 $ADDU $c_1,$t_2 | |
2064 sltu $at,$c_1,$t_2 | |
2065 $ADDU $c_2,$at | |
2066 $ST $c_3,5*$BNSZ($a0) | |
2067 | |
2068 mflo $t_1 | |
2069 mfhi $t_2 | |
2070 slt $c_3,$t_2,$zero | |
2071 $SLL $t_2,1 | |
2072 $MULTU $a_5,$a_1 # mul_add_c2(a[5],b[1],c1,c2,c3); | |
2073 slt $a2,$t_1,$zero | |
2074 $ADDU $t_2,$a2 | |
2075 $SLL $t_1,1 | |
2076 $ADDU $c_1,$t_1 | |
2077 sltu $at,$c_1,$t_1 | |
2078 $ADDU $t_2,$at | |
2079 $ADDU $c_2,$t_2 | |
2080 sltu $at,$c_2,$t_2 | |
2081 $ADDU $c_3,$at | |
2082 mflo $t_1 | |
2083 mfhi $t_2 | |
2084 slt $at,$t_2,$zero | |
2085 $ADDU $c_3,$at | |
2086 $MULTU $a_4,$a_2 # mul_add_c2(a[4],b[2],c1,c2,c3); | |
2087 $SLL $t_2,1 | |
2088 slt $a2,$t_1,$zero | |
2089 $ADDU $t_2,$a2 | |
2090 $SLL $t_1,1 | |
2091 $ADDU $c_1,$t_1 | |
2092 sltu $at,$c_1,$t_1 | |
2093 $ADDU $t_2,$at | |
2094 $ADDU $c_2,$t_2 | |
2095 sltu $at,$c_2,$t_2 | |
2096 $ADDU $c_3,$at | |
2097 mflo $t_1 | |
2098 mfhi $t_2 | |
2099 slt $at,$t_2,$zero | |
2100 $ADDU $c_3,$at | |
2101 $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); | |
2102 $SLL $t_2,1 | |
2103 slt $a2,$t_1,$zero | |
2104 $ADDU $t_2,$a2 | |
2105 $SLL $t_1,1 | |
2106 $ADDU $c_1,$t_1 | |
2107 sltu $at,$c_1,$t_1 | |
2108 $ADDU $t_2,$at | |
2109 $ADDU $c_2,$t_2 | |
2110 sltu $at,$c_2,$t_2 | |
2111 $ADDU $c_3,$at | |
2112 mflo $t_1 | |
2113 mfhi $t_2 | |
2114 $ADDU $c_1,$t_1 | |
2115 sltu $at,$c_1,$t_1 | |
2116 $MULTU $a_0,$a_7 # mul_add_c2(a[0],b[7],c2,c3,c1); | |
2117 $ADDU $t_2,$at | |
2118 $ADDU $c_2,$t_2 | |
2119 sltu $at,$c_2,$t_2 | |
2120 $ADDU $c_3,$at | |
2121 $ST $c_1,6*$BNSZ($a0) | |
2122 | |
2123 mflo $t_1 | |
2124 mfhi $t_2 | |
2125 slt $c_1,$t_2,$zero | |
2126 $SLL $t_2,1 | |
2127 $MULTU $a_1,$a_6 # mul_add_c2(a[1],b[6],c2,c3,c1); | |
2128 slt $a2,$t_1,$zero | |
2129 $ADDU $t_2,$a2 | |
2130 $SLL $t_1,1 | |
2131 $ADDU $c_2,$t_1 | |
2132 sltu $at,$c_2,$t_1 | |
2133 $ADDU $t_2,$at | |
2134 $ADDU $c_3,$t_2 | |
2135 sltu $at,$c_3,$t_2 | |
2136 $ADDU $c_1,$at | |
2137 mflo $t_1 | |
2138 mfhi $t_2 | |
2139 slt $at,$t_2,$zero | |
2140 $ADDU $c_1,$at | |
2141 $MULTU $a_2,$a_5 # mul_add_c2(a[2],b[5],c2,c3,c1); | |
2142 $SLL $t_2,1 | |
2143 slt $a2,$t_1,$zero | |
2144 $ADDU $t_2,$a2 | |
2145 $SLL $t_1,1 | |
2146 $ADDU $c_2,$t_1 | |
2147 sltu $at,$c_2,$t_1 | |
2148 $ADDU $t_2,$at | |
2149 $ADDU $c_3,$t_2 | |
2150 sltu $at,$c_3,$t_2 | |
2151 $ADDU $c_1,$at | |
2152 mflo $t_1 | |
2153 mfhi $t_2 | |
2154 slt $at,$t_2,$zero | |
2155 $ADDU $c_1,$at | |
2156 $MULTU $a_3,$a_4 # mul_add_c2(a[3],b[4],c2,c3,c1); | |
2157 $SLL $t_2,1 | |
2158 slt $a2,$t_1,$zero | |
2159 $ADDU $t_2,$a2 | |
2160 $SLL $t_1,1 | |
2161 $ADDU $c_2,$t_1 | |
2162 sltu $at,$c_2,$t_1 | |
2163 $ADDU $t_2,$at | |
2164 $ADDU $c_3,$t_2 | |
2165 sltu $at,$c_3,$t_2 | |
2166 $ADDU $c_1,$at | |
2167 mflo $t_1 | |
2168 mfhi $t_2 | |
2169 slt $at,$t_2,$zero | |
2170 $ADDU $c_1,$at | |
2171 $MULTU $a_7,$a_1 # mul_add_c2(a[7],b[1],c3,c1,c2); | |
2172 $SLL $t_2,1 | |
2173 slt $a2,$t_1,$zero | |
2174 $ADDU $t_2,$a2 | |
2175 $SLL $t_1,1 | |
2176 $ADDU $c_2,$t_1 | |
2177 sltu $at,$c_2,$t_1 | |
2178 $ADDU $t_2,$at | |
2179 $ADDU $c_3,$t_2 | |
2180 sltu $at,$c_3,$t_2 | |
2181 $ADDU $c_1,$at | |
2182 $ST $c_2,7*$BNSZ($a0) | |
2183 | |
2184 mflo $t_1 | |
2185 mfhi $t_2 | |
2186 slt $c_2,$t_2,$zero | |
2187 $SLL $t_2,1 | |
2188 $MULTU $a_6,$a_2 # mul_add_c2(a[6],b[2],c3,c1,c2); | |
2189 slt $a2,$t_1,$zero | |
2190 $ADDU $t_2,$a2 | |
2191 $SLL $t_1,1 | |
2192 $ADDU $c_3,$t_1 | |
2193 sltu $at,$c_3,$t_1 | |
2194 $ADDU $t_2,$at | |
2195 $ADDU $c_1,$t_2 | |
2196 sltu $at,$c_1,$t_2 | |
2197 $ADDU $c_2,$at | |
2198 mflo $t_1 | |
2199 mfhi $t_2 | |
2200 slt $at,$t_2,$zero | |
2201 $ADDU $c_2,$at | |
2202 $MULTU $a_5,$a_3 # mul_add_c2(a[5],b[3],c3,c1,c2); | |
2203 $SLL $t_2,1 | |
2204 slt $a2,$t_1,$zero | |
2205 $ADDU $t_2,$a2 | |
2206 $SLL $t_1,1 | |
2207 $ADDU $c_3,$t_1 | |
2208 sltu $at,$c_3,$t_1 | |
2209 $ADDU $t_2,$at | |
2210 $ADDU $c_1,$t_2 | |
2211 sltu $at,$c_1,$t_2 | |
2212 $ADDU $c_2,$at | |
2213 mflo $t_1 | |
2214 mfhi $t_2 | |
2215 slt $at,$t_2,$zero | |
2216 $ADDU $c_2,$at | |
2217 $MULTU $a_4,$a_4 # mul_add_c(a[4],b[4],c3,c1,c2); | |
2218 $SLL $t_2,1 | |
2219 slt $a2,$t_1,$zero | |
2220 $ADDU $t_2,$a2 | |
2221 $SLL $t_1,1 | |
2222 $ADDU $c_3,$t_1 | |
2223 sltu $at,$c_3,$t_1 | |
2224 $ADDU $t_2,$at | |
2225 $ADDU $c_1,$t_2 | |
2226 sltu $at,$c_1,$t_2 | |
2227 $ADDU $c_2,$at | |
2228 mflo $t_1 | |
2229 mfhi $t_2 | |
2230 $ADDU $c_3,$t_1 | |
2231 sltu $at,$c_3,$t_1 | |
2232 $MULTU $a_2,$a_7 # mul_add_c2(a[2],b[7],c1,c2,c3); | |
2233 $ADDU $t_2,$at | |
2234 $ADDU $c_1,$t_2 | |
2235 sltu $at,$c_1,$t_2 | |
2236 $ADDU $c_2,$at | |
2237 $ST $c_3,8*$BNSZ($a0) | |
2238 | |
2239 mflo $t_1 | |
2240 mfhi $t_2 | |
2241 slt $c_3,$t_2,$zero | |
2242 $SLL $t_2,1 | |
2243 $MULTU $a_3,$a_6 # mul_add_c2(a[3],b[6],c1,c2,c3); | |
2244 slt $a2,$t_1,$zero | |
2245 $ADDU $t_2,$a2 | |
2246 $SLL $t_1,1 | |
2247 $ADDU $c_1,$t_1 | |
2248 sltu $at,$c_1,$t_1 | |
2249 $ADDU $t_2,$at | |
2250 $ADDU $c_2,$t_2 | |
2251 sltu $at,$c_2,$t_2 | |
2252 $ADDU $c_3,$at | |
2253 mflo $t_1 | |
2254 mfhi $t_2 | |
2255 slt $at,$t_2,$zero | |
2256 $ADDU $c_3,$at | |
2257 $MULTU $a_4,$a_5 # mul_add_c2(a[4],b[5],c1,c2,c3); | |
2258 $SLL $t_2,1 | |
2259 slt $a2,$t_1,$zero | |
2260 $ADDU $t_2,$a2 | |
2261 $SLL $t_1,1 | |
2262 $ADDU $c_1,$t_1 | |
2263 sltu $at,$c_1,$t_1 | |
2264 $ADDU $t_2,$at | |
2265 $ADDU $c_2,$t_2 | |
2266 sltu $at,$c_2,$t_2 | |
2267 $ADDU $c_3,$at | |
2268 mflo $t_1 | |
2269 mfhi $t_2 | |
2270 slt $at,$t_2,$zero | |
2271 $ADDU $c_3,$at | |
2272 $MULTU $a_7,$a_3 # mul_add_c2(a[7],b[3],c2,c3,c1); | |
2273 $SLL $t_2,1 | |
2274 slt $a2,$t_1,$zero | |
2275 $ADDU $t_2,$a2 | |
2276 $SLL $t_1,1 | |
2277 $ADDU $c_1,$t_1 | |
2278 sltu $at,$c_1,$t_1 | |
2279 $ADDU $t_2,$at | |
2280 $ADDU $c_2,$t_2 | |
2281 sltu $at,$c_2,$t_2 | |
2282 $ADDU $c_3,$at | |
2283 $ST $c_1,9*$BNSZ($a0) | |
2284 | |
2285 mflo $t_1 | |
2286 mfhi $t_2 | |
2287 slt $c_1,$t_2,$zero | |
2288 $SLL $t_2,1 | |
2289 $MULTU $a_6,$a_4 # mul_add_c2(a[6],b[4],c2,c3,c1); | |
2290 slt $a2,$t_1,$zero | |
2291 $ADDU $t_2,$a2 | |
2292 $SLL $t_1,1 | |
2293 $ADDU $c_2,$t_1 | |
2294 sltu $at,$c_2,$t_1 | |
2295 $ADDU $t_2,$at | |
2296 $ADDU $c_3,$t_2 | |
2297 sltu $at,$c_3,$t_2 | |
2298 $ADDU $c_1,$at | |
2299 mflo $t_1 | |
2300 mfhi $t_2 | |
2301 slt $at,$t_2,$zero | |
2302 $ADDU $c_1,$at | |
2303 $MULTU $a_5,$a_5 # mul_add_c(a[5],b[5],c2,c3,c1); | |
2304 $SLL $t_2,1 | |
2305 slt $a2,$t_1,$zero | |
2306 $ADDU $t_2,$a2 | |
2307 $SLL $t_1,1 | |
2308 $ADDU $c_2,$t_1 | |
2309 sltu $at,$c_2,$t_1 | |
2310 $ADDU $t_2,$at | |
2311 $ADDU $c_3,$t_2 | |
2312 sltu $at,$c_3,$t_2 | |
2313 $ADDU $c_1,$at | |
2314 mflo $t_1 | |
2315 mfhi $t_2 | |
2316 $ADDU $c_2,$t_1 | |
2317 sltu $at,$c_2,$t_1 | |
2318 $MULTU $a_4,$a_7 # mul_add_c2(a[4],b[7],c3,c1,c2); | |
2319 $ADDU $t_2,$at | |
2320 $ADDU $c_3,$t_2 | |
2321 sltu $at,$c_3,$t_2 | |
2322 $ADDU $c_1,$at | |
2323 $ST $c_2,10*$BNSZ($a0) | |
2324 | |
2325 mflo $t_1 | |
2326 mfhi $t_2 | |
2327 slt $c_2,$t_2,$zero | |
2328 $SLL $t_2,1 | |
2329 $MULTU $a_5,$a_6 # mul_add_c2(a[5],b[6],c3,c1,c2); | |
2330 slt $a2,$t_1,$zero | |
2331 $ADDU $t_2,$a2 | |
2332 $SLL $t_1,1 | |
2333 $ADDU $c_3,$t_1 | |
2334 sltu $at,$c_3,$t_1 | |
2335 $ADDU $t_2,$at | |
2336 $ADDU $c_1,$t_2 | |
2337 sltu $at,$c_1,$t_2 | |
2338 $ADDU $c_2,$at | |
2339 mflo $t_1 | |
2340 mfhi $t_2 | |
2341 slt $at,$t_2,$zero | |
2342 $ADDU $c_2,$at | |
2343 $MULTU $a_7,$a_5 # mul_add_c2(a[7],b[5],c1,c2,c3); | |
2344 $SLL $t_2,1 | |
2345 slt $a2,$t_1,$zero | |
2346 $ADDU $t_2,$a2 | |
2347 $SLL $t_1,1 | |
2348 $ADDU $c_3,$t_1 | |
2349 sltu $at,$c_3,$t_1 | |
2350 $ADDU $t_2,$at | |
2351 $ADDU $c_1,$t_2 | |
2352 sltu $at,$c_1,$t_2 | |
2353 $ADDU $c_2,$at | |
2354 $ST $c_3,11*$BNSZ($a0) | |
2355 | |
2356 mflo $t_1 | |
2357 mfhi $t_2 | |
2358 slt $c_3,$t_2,$zero | |
2359 $SLL $t_2,1 | |
2360 $MULTU $a_6,$a_6 # mul_add_c(a[6],b[6],c1,c2,c3); | |
2361 slt $a2,$t_1,$zero | |
2362 $ADDU $t_2,$a2 | |
2363 $SLL $t_1,1 | |
2364 $ADDU $c_1,$t_1 | |
2365 sltu $at,$c_1,$t_1 | |
2366 $ADDU $t_2,$at | |
2367 $ADDU $c_2,$t_2 | |
2368 sltu $at,$c_2,$t_2 | |
2369 $ADDU $c_3,$at | |
2370 mflo $t_1 | |
2371 mfhi $t_2 | |
2372 $ADDU $c_1,$t_1 | |
2373 sltu $at,$c_1,$t_1 | |
2374 $MULTU $a_6,$a_7 # mul_add_c2(a[6],b[7],c2,c3,c1); | |
2375 $ADDU $t_2,$at | |
2376 $ADDU $c_2,$t_2 | |
2377 sltu $at,$c_2,$t_2 | |
2378 $ADDU $c_3,$at | |
2379 $ST $c_1,12*$BNSZ($a0) | |
2380 | |
2381 mflo $t_1 | |
2382 mfhi $t_2 | |
2383 slt $c_1,$t_2,$zero | |
2384 $SLL $t_2,1 | |
2385 $MULTU $a_7,$a_7 # mul_add_c(a[7],b[7],c3,c1,c2); | |
2386 slt $a2,$t_1,$zero | |
2387 $ADDU $t_2,$a2 | |
2388 $SLL $t_1,1 | |
2389 $ADDU $c_2,$t_1 | |
2390 sltu $at,$c_2,$t_1 | |
2391 $ADDU $t_2,$at | |
2392 $ADDU $c_3,$t_2 | |
2393 sltu $at,$c_3,$t_2 | |
2394 $ADDU $c_1,$at | |
2395 $ST $c_2,13*$BNSZ($a0) | |
2396 | |
2397 mflo $t_1 | |
2398 mfhi $t_2 | |
2399 $ADDU $c_3,$t_1 | |
2400 sltu $at,$c_3,$t_1 | |
2401 $ADDU $t_2,$at | |
2402 $ADDU $c_1,$t_2 | |
2403 $ST $c_3,14*$BNSZ($a0) | |
2404 $ST $c_1,15*$BNSZ($a0) | |
2405 | |
2406 .set noreorder | |
2407 ___ | |
2408 $code.=<<___ if ($flavour =~ /nubi/i); | |
2409 $REG_L $t3,4*$SZREG($sp) | |
2410 $REG_L $t2,3*$SZREG($sp) | |
2411 $REG_L $t1,2*$SZREG($sp) | |
2412 $REG_L $t0,1*$SZREG($sp) | |
2413 $REG_L $gp,0*$SZREG($sp) | |
2414 $PTR_ADD $sp,6*$SZREG | |
2415 ___ | |
2416 $code.=<<___; | |
2417 jr $ra | |
2418 nop | |
2419 .end bn_sqr_comba8 | |
2420 | |
2421 .align 5 | |
2422 .globl bn_sqr_comba4 | |
2423 .ent bn_sqr_comba4 | |
2424 bn_sqr_comba4: | |
2425 ___ | |
2426 $code.=<<___ if ($flavour =~ /nubi/i); | |
2427 .frame $sp,6*$SZREG,$ra | |
2428 .mask 0x8000f008,-$SZREG | |
2429 .set noreorder | |
2430 $PTR_SUB $sp,6*$SZREG | |
2431 $REG_S $ra,5*$SZREG($sp) | |
2432 $REG_S $t3,4*$SZREG($sp) | |
2433 $REG_S $t2,3*$SZREG($sp) | |
2434 $REG_S $t1,2*$SZREG($sp) | |
2435 $REG_S $t0,1*$SZREG($sp) | |
2436 $REG_S $gp,0*$SZREG($sp) | |
2437 ___ | |
2438 $code.=<<___; | |
2439 .set reorder | |
2440 $LD $a_0,0($a1) | |
2441 $LD $a_1,$BNSZ($a1) | |
2442 $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3); | |
2443 $LD $a_2,2*$BNSZ($a1) | |
2444 $LD $a_3,3*$BNSZ($a1) | |
2445 mflo $c_1 | |
2446 mfhi $c_2 | |
2447 $ST $c_1,0($a0) | |
2448 | |
2449 $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1); | |
2450 mflo $t_1 | |
2451 mfhi $t_2 | |
2452 slt $c_1,$t_2,$zero | |
2453 $SLL $t_2,1 | |
2454 $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2); | |
2455 slt $a2,$t_1,$zero | |
2456 $ADDU $t_2,$a2 | |
2457 $SLL $t_1,1 | |
2458 $ADDU $c_2,$t_1 | |
2459 sltu $at,$c_2,$t_1 | |
2460 $ADDU $c_3,$t_2,$at | |
2461 $ST $c_2,$BNSZ($a0) | |
2462 | |
2463 mflo $t_1 | |
2464 mfhi $t_2 | |
2465 slt $c_2,$t_2,$zero | |
2466 $SLL $t_2,1 | |
2467 $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); | |
2468 slt $a2,$t_1,$zero | |
2469 $ADDU $t_2,$a2 | |
2470 $SLL $t_1,1 | |
2471 $ADDU $c_3,$t_1 | |
2472 sltu $at,$c_3,$t_1 | |
2473 $ADDU $t_2,$at | |
2474 $ADDU $c_1,$t_2 | |
2475 sltu $at,$c_1,$t_2 | |
2476 $ADDU $c_2,$at | |
2477 mflo $t_1 | |
2478 mfhi $t_2 | |
2479 $ADDU $c_3,$t_1 | |
2480 sltu $at,$c_3,$t_1 | |
2481 $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3); | |
2482 $ADDU $t_2,$at | |
2483 $ADDU $c_1,$t_2 | |
2484 sltu $at,$c_1,$t_2 | |
2485 $ADDU $c_2,$at | |
2486 $ST $c_3,2*$BNSZ($a0) | |
2487 | |
2488 mflo $t_1 | |
2489 mfhi $t_2 | |
2490 slt $c_3,$t_2,$zero | |
2491 $SLL $t_2,1 | |
2492 $MULTU $a_1,$a_2 # mul_add_c(a2[1],b[2],c1,c2,c3); | |
2493 slt $a2,$t_1,$zero | |
2494 $ADDU $t_2,$a2 | |
2495 $SLL $t_1,1 | |
2496 $ADDU $c_1,$t_1 | |
2497 sltu $at,$c_1,$t_1 | |
2498 $ADDU $t_2,$at | |
2499 $ADDU $c_2,$t_2 | |
2500 sltu $at,$c_2,$t_2 | |
2501 $ADDU $c_3,$at | |
2502 mflo $t_1 | |
2503 mfhi $t_2 | |
2504 slt $at,$t_2,$zero | |
2505 $ADDU $c_3,$at | |
2506 $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); | |
2507 $SLL $t_2,1 | |
2508 slt $a2,$t_1,$zero | |
2509 $ADDU $t_2,$a2 | |
2510 $SLL $t_1,1 | |
2511 $ADDU $c_1,$t_1 | |
2512 sltu $at,$c_1,$t_1 | |
2513 $ADDU $t_2,$at | |
2514 $ADDU $c_2,$t_2 | |
2515 sltu $at,$c_2,$t_2 | |
2516 $ADDU $c_3,$at | |
2517 $ST $c_1,3*$BNSZ($a0) | |
2518 | |
2519 mflo $t_1 | |
2520 mfhi $t_2 | |
2521 slt $c_1,$t_2,$zero | |
2522 $SLL $t_2,1 | |
2523 $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); | |
2524 slt $a2,$t_1,$zero | |
2525 $ADDU $t_2,$a2 | |
2526 $SLL $t_1,1 | |
2527 $ADDU $c_2,$t_1 | |
2528 sltu $at,$c_2,$t_1 | |
2529 $ADDU $t_2,$at | |
2530 $ADDU $c_3,$t_2 | |
2531 sltu $at,$c_3,$t_2 | |
2532 $ADDU $c_1,$at | |
2533 mflo $t_1 | |
2534 mfhi $t_2 | |
2535 $ADDU $c_2,$t_1 | |
2536 sltu $at,$c_2,$t_1 | |
2537 $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); | |
2538 $ADDU $t_2,$at | |
2539 $ADDU $c_3,$t_2 | |
2540 sltu $at,$c_3,$t_2 | |
2541 $ADDU $c_1,$at | |
2542 $ST $c_2,4*$BNSZ($a0) | |
2543 | |
2544 mflo $t_1 | |
2545 mfhi $t_2 | |
2546 slt $c_2,$t_2,$zero | |
2547 $SLL $t_2,1 | |
2548 $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); | |
2549 slt $a2,$t_1,$zero | |
2550 $ADDU $t_2,$a2 | |
2551 $SLL $t_1,1 | |
2552 $ADDU $c_3,$t_1 | |
2553 sltu $at,$c_3,$t_1 | |
2554 $ADDU $t_2,$at | |
2555 $ADDU $c_1,$t_2 | |
2556 sltu $at,$c_1,$t_2 | |
2557 $ADDU $c_2,$at | |
2558 $ST $c_3,5*$BNSZ($a0) | |
2559 | |
2560 mflo $t_1 | |
2561 mfhi $t_2 | |
2562 $ADDU $c_1,$t_1 | |
2563 sltu $at,$c_1,$t_1 | |
2564 $ADDU $t_2,$at | |
2565 $ADDU $c_2,$t_2 | |
2566 $ST $c_1,6*$BNSZ($a0) | |
2567 $ST $c_2,7*$BNSZ($a0) | |
2568 | |
2569 .set noreorder | |
2570 ___ | |
2571 $code.=<<___ if ($flavour =~ /nubi/i); | |
2572 $REG_L $t3,4*$SZREG($sp) | |
2573 $REG_L $t2,3*$SZREG($sp) | |
2574 $REG_L $t1,2*$SZREG($sp) | |
2575 $REG_L $t0,1*$SZREG($sp) | |
2576 $REG_L $gp,0*$SZREG($sp) | |
2577 $PTR_ADD $sp,6*$SZREG | |
2578 ___ | |
2579 $code.=<<___; | |
2580 jr $ra | |
2581 nop | |
2582 .end bn_sqr_comba4 | |
2583 ___ | |
2584 print $code; | |
2585 close STDOUT; | |
OLD | NEW |