OLD | NEW |
| (Empty) |
1 .set mips2 | |
2 .rdata | |
3 .asciiz "mips3.s, Version 1.2" | |
4 .asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" | |
5 | |
6 .text | |
7 .set noat | |
8 | |
9 .align 5 | |
10 .globl bn_mul_add_words | |
11 .ent bn_mul_add_words | |
12 bn_mul_add_words: | |
13 .set noreorder | |
14 bgtz $6,bn_mul_add_words_internal | |
15 move $2,$0 | |
16 jr $31 | |
17 move $4,$2 | |
18 .end bn_mul_add_words | |
19 | |
20 .align 5 | |
21 .ent bn_mul_add_words_internal | |
22 bn_mul_add_words_internal: | |
23 .set reorder | |
24 li $3,-4 | |
25 and $8,$6,$3 | |
26 lw $12,0($5) | |
27 beqz $8,.L_bn_mul_add_words_tail | |
28 | |
29 .L_bn_mul_add_words_loop: | |
30 multu $12,$7 | |
31 lw $13,0($4) | |
32 lw $14,4($5) | |
33 lw $15,4($4) | |
34 lw $8,2*4($5) | |
35 lw $9,2*4($4) | |
36 addu $13,$2 | |
37 sltu $2,$13,$2 # All manuals say it "compares 32-bit | |
38 # values", but it seems to work fine | |
39 # even on 64-bit registers. | |
40 mflo $1 | |
41 mfhi $12 | |
42 addu $13,$1 | |
43 addu $2,$12 | |
44 multu $14,$7 | |
45 sltu $1,$13,$1 | |
46 sw $13,0($4) | |
47 addu $2,$1 | |
48 | |
49 lw $10,3*4($5) | |
50 lw $11,3*4($4) | |
51 addu $15,$2 | |
52 sltu $2,$15,$2 | |
53 mflo $1 | |
54 mfhi $14 | |
55 addu $15,$1 | |
56 addu $2,$14 | |
57 multu $8,$7 | |
58 sltu $1,$15,$1 | |
59 sw $15,4($4) | |
60 addu $2,$1 | |
61 | |
62 subu $6,4 | |
63 addu $4,4*4 | |
64 addu $5,4*4 | |
65 addu $9,$2 | |
66 sltu $2,$9,$2 | |
67 mflo $1 | |
68 mfhi $8 | |
69 addu $9,$1 | |
70 addu $2,$8 | |
71 multu $10,$7 | |
72 sltu $1,$9,$1 | |
73 sw $9,-2*4($4) | |
74 addu $2,$1 | |
75 | |
76 | |
77 and $8,$6,$3 | |
78 addu $11,$2 | |
79 sltu $2,$11,$2 | |
80 mflo $1 | |
81 mfhi $10 | |
82 addu $11,$1 | |
83 addu $2,$10 | |
84 sltu $1,$11,$1 | |
85 sw $11,-4($4) | |
86 addu $2,$1 | |
87 .set noreorder | |
88 bgtzl $8,.L_bn_mul_add_words_loop | |
89 lw $12,0($5) | |
90 | |
91 beqz $6,.L_bn_mul_add_words_return | |
92 nop | |
93 | |
94 .L_bn_mul_add_words_tail: | |
95 .set reorder | |
96 lw $12,0($5) | |
97 multu $12,$7 | |
98 lw $13,0($4) | |
99 subu $6,1 | |
100 addu $13,$2 | |
101 sltu $2,$13,$2 | |
102 mflo $1 | |
103 mfhi $12 | |
104 addu $13,$1 | |
105 addu $2,$12 | |
106 sltu $1,$13,$1 | |
107 sw $13,0($4) | |
108 addu $2,$1 | |
109 beqz $6,.L_bn_mul_add_words_return | |
110 | |
111 lw $12,4($5) | |
112 multu $12,$7 | |
113 lw $13,4($4) | |
114 subu $6,1 | |
115 addu $13,$2 | |
116 sltu $2,$13,$2 | |
117 mflo $1 | |
118 mfhi $12 | |
119 addu $13,$1 | |
120 addu $2,$12 | |
121 sltu $1,$13,$1 | |
122 sw $13,4($4) | |
123 addu $2,$1 | |
124 beqz $6,.L_bn_mul_add_words_return | |
125 | |
126 lw $12,2*4($5) | |
127 multu $12,$7 | |
128 lw $13,2*4($4) | |
129 addu $13,$2 | |
130 sltu $2,$13,$2 | |
131 mflo $1 | |
132 mfhi $12 | |
133 addu $13,$1 | |
134 addu $2,$12 | |
135 sltu $1,$13,$1 | |
136 sw $13,2*4($4) | |
137 addu $2,$1 | |
138 | |
139 .L_bn_mul_add_words_return: | |
140 .set noreorder | |
141 jr $31 | |
142 move $4,$2 | |
143 .end bn_mul_add_words_internal | |
144 | |
145 .align 5 | |
146 .globl bn_mul_words | |
147 .ent bn_mul_words | |
148 bn_mul_words: | |
149 .set noreorder | |
150 bgtz $6,bn_mul_words_internal | |
151 move $2,$0 | |
152 jr $31 | |
153 move $4,$2 | |
154 .end bn_mul_words | |
155 | |
156 .align 5 | |
157 .ent bn_mul_words_internal | |
158 bn_mul_words_internal: | |
159 .set reorder | |
160 li $3,-4 | |
161 and $8,$6,$3 | |
162 lw $12,0($5) | |
163 beqz $8,.L_bn_mul_words_tail | |
164 | |
165 .L_bn_mul_words_loop: | |
166 multu $12,$7 | |
167 lw $14,4($5) | |
168 lw $8,2*4($5) | |
169 lw $10,3*4($5) | |
170 mflo $1 | |
171 mfhi $12 | |
172 addu $2,$1 | |
173 sltu $13,$2,$1 | |
174 multu $14,$7 | |
175 sw $2,0($4) | |
176 addu $2,$13,$12 | |
177 | |
178 subu $6,4 | |
179 addu $4,4*4 | |
180 addu $5,4*4 | |
181 mflo $1 | |
182 mfhi $14 | |
183 addu $2,$1 | |
184 sltu $15,$2,$1 | |
185 multu $8,$7 | |
186 sw $2,-3*4($4) | |
187 addu $2,$15,$14 | |
188 | |
189 mflo $1 | |
190 mfhi $8 | |
191 addu $2,$1 | |
192 sltu $9,$2,$1 | |
193 multu $10,$7 | |
194 sw $2,-2*4($4) | |
195 addu $2,$9,$8 | |
196 | |
197 and $8,$6,$3 | |
198 mflo $1 | |
199 mfhi $10 | |
200 addu $2,$1 | |
201 sltu $11,$2,$1 | |
202 sw $2,-4($4) | |
203 addu $2,$11,$10 | |
204 .set noreorder | |
205 bgtzl $8,.L_bn_mul_words_loop | |
206 lw $12,0($5) | |
207 | |
208 beqz $6,.L_bn_mul_words_return | |
209 nop | |
210 | |
211 .L_bn_mul_words_tail: | |
212 .set reorder | |
213 lw $12,0($5) | |
214 multu $12,$7 | |
215 subu $6,1 | |
216 mflo $1 | |
217 mfhi $12 | |
218 addu $2,$1 | |
219 sltu $13,$2,$1 | |
220 sw $2,0($4) | |
221 addu $2,$13,$12 | |
222 beqz $6,.L_bn_mul_words_return | |
223 | |
224 lw $12,4($5) | |
225 multu $12,$7 | |
226 subu $6,1 | |
227 mflo $1 | |
228 mfhi $12 | |
229 addu $2,$1 | |
230 sltu $13,$2,$1 | |
231 sw $2,4($4) | |
232 addu $2,$13,$12 | |
233 beqz $6,.L_bn_mul_words_return | |
234 | |
235 lw $12,2*4($5) | |
236 multu $12,$7 | |
237 mflo $1 | |
238 mfhi $12 | |
239 addu $2,$1 | |
240 sltu $13,$2,$1 | |
241 sw $2,2*4($4) | |
242 addu $2,$13,$12 | |
243 | |
244 .L_bn_mul_words_return: | |
245 .set noreorder | |
246 jr $31 | |
247 move $4,$2 | |
248 .end bn_mul_words_internal | |
249 | |
250 .align 5 | |
251 .globl bn_sqr_words | |
252 .ent bn_sqr_words | |
253 bn_sqr_words: | |
254 .set noreorder | |
255 bgtz $6,bn_sqr_words_internal | |
256 move $2,$0 | |
257 jr $31 | |
258 move $4,$2 | |
259 .end bn_sqr_words | |
260 | |
261 .align 5 | |
262 .ent bn_sqr_words_internal | |
263 bn_sqr_words_internal: | |
264 .set reorder | |
265 li $3,-4 | |
266 and $8,$6,$3 | |
267 lw $12,0($5) | |
268 beqz $8,.L_bn_sqr_words_tail | |
269 | |
270 .L_bn_sqr_words_loop: | |
271 multu $12,$12 | |
272 lw $14,4($5) | |
273 lw $8,2*4($5) | |
274 lw $10,3*4($5) | |
275 mflo $13 | |
276 mfhi $12 | |
277 sw $13,0($4) | |
278 sw $12,4($4) | |
279 | |
280 multu $14,$14 | |
281 subu $6,4 | |
282 addu $4,8*4 | |
283 addu $5,4*4 | |
284 mflo $15 | |
285 mfhi $14 | |
286 sw $15,-6*4($4) | |
287 sw $14,-5*4($4) | |
288 | |
289 multu $8,$8 | |
290 mflo $9 | |
291 mfhi $8 | |
292 sw $9,-4*4($4) | |
293 sw $8,-3*4($4) | |
294 | |
295 | |
296 multu $10,$10 | |
297 and $8,$6,$3 | |
298 mflo $11 | |
299 mfhi $10 | |
300 sw $11,-2*4($4) | |
301 sw $10,-4($4) | |
302 | |
303 .set noreorder | |
304 bgtzl $8,.L_bn_sqr_words_loop | |
305 lw $12,0($5) | |
306 | |
307 beqz $6,.L_bn_sqr_words_return | |
308 nop | |
309 | |
310 .L_bn_sqr_words_tail: | |
311 .set reorder | |
312 lw $12,0($5) | |
313 multu $12,$12 | |
314 subu $6,1 | |
315 mflo $13 | |
316 mfhi $12 | |
317 sw $13,0($4) | |
318 sw $12,4($4) | |
319 beqz $6,.L_bn_sqr_words_return | |
320 | |
321 lw $12,4($5) | |
322 multu $12,$12 | |
323 subu $6,1 | |
324 mflo $13 | |
325 mfhi $12 | |
326 sw $13,2*4($4) | |
327 sw $12,3*4($4) | |
328 beqz $6,.L_bn_sqr_words_return | |
329 | |
330 lw $12,2*4($5) | |
331 multu $12,$12 | |
332 mflo $13 | |
333 mfhi $12 | |
334 sw $13,4*4($4) | |
335 sw $12,5*4($4) | |
336 | |
337 .L_bn_sqr_words_return: | |
338 .set noreorder | |
339 jr $31 | |
340 move $4,$2 | |
341 | |
342 .end bn_sqr_words_internal | |
343 | |
344 .align 5 | |
345 .globl bn_add_words | |
346 .ent bn_add_words | |
347 bn_add_words: | |
348 .set noreorder | |
349 bgtz $7,bn_add_words_internal | |
350 move $2,$0 | |
351 jr $31 | |
352 move $4,$2 | |
353 .end bn_add_words | |
354 | |
355 .align 5 | |
356 .ent bn_add_words_internal | |
357 bn_add_words_internal: | |
358 .set reorder | |
359 li $3,-4 | |
360 and $1,$7,$3 | |
361 lw $12,0($5) | |
362 beqz $1,.L_bn_add_words_tail | |
363 | |
364 .L_bn_add_words_loop: | |
365 lw $8,0($6) | |
366 subu $7,4 | |
367 lw $13,4($5) | |
368 and $1,$7,$3 | |
369 lw $14,2*4($5) | |
370 addu $6,4*4 | |
371 lw $15,3*4($5) | |
372 addu $4,4*4 | |
373 lw $9,-3*4($6) | |
374 addu $5,4*4 | |
375 lw $10,-2*4($6) | |
376 lw $11,-4($6) | |
377 addu $8,$12 | |
378 sltu $24,$8,$12 | |
379 addu $12,$8,$2 | |
380 sltu $2,$12,$8 | |
381 sw $12,-4*4($4) | |
382 addu $2,$24 | |
383 | |
384 addu $9,$13 | |
385 sltu $25,$9,$13 | |
386 addu $13,$9,$2 | |
387 sltu $2,$13,$9 | |
388 sw $13,-3*4($4) | |
389 addu $2,$25 | |
390 | |
391 addu $10,$14 | |
392 sltu $24,$10,$14 | |
393 addu $14,$10,$2 | |
394 sltu $2,$14,$10 | |
395 sw $14,-2*4($4) | |
396 addu $2,$24 | |
397 | |
398 addu $11,$15 | |
399 sltu $25,$11,$15 | |
400 addu $15,$11,$2 | |
401 sltu $2,$15,$11 | |
402 sw $15,-4($4) | |
403 addu $2,$25 | |
404 | |
405 .set noreorder | |
406 bgtzl $1,.L_bn_add_words_loop | |
407 lw $12,0($5) | |
408 | |
409 beqz $7,.L_bn_add_words_return | |
410 nop | |
411 | |
412 .L_bn_add_words_tail: | |
413 .set reorder | |
414 lw $12,0($5) | |
415 lw $8,0($6) | |
416 addu $8,$12 | |
417 subu $7,1 | |
418 sltu $24,$8,$12 | |
419 addu $12,$8,$2 | |
420 sltu $2,$12,$8 | |
421 sw $12,0($4) | |
422 addu $2,$24 | |
423 beqz $7,.L_bn_add_words_return | |
424 | |
425 lw $13,4($5) | |
426 lw $9,4($6) | |
427 addu $9,$13 | |
428 subu $7,1 | |
429 sltu $25,$9,$13 | |
430 addu $13,$9,$2 | |
431 sltu $2,$13,$9 | |
432 sw $13,4($4) | |
433 addu $2,$25 | |
434 beqz $7,.L_bn_add_words_return | |
435 | |
436 lw $14,2*4($5) | |
437 lw $10,2*4($6) | |
438 addu $10,$14 | |
439 sltu $24,$10,$14 | |
440 addu $14,$10,$2 | |
441 sltu $2,$14,$10 | |
442 sw $14,2*4($4) | |
443 addu $2,$24 | |
444 | |
445 .L_bn_add_words_return: | |
446 .set noreorder | |
447 jr $31 | |
448 move $4,$2 | |
449 | |
450 .end bn_add_words_internal | |
451 | |
452 .align 5 | |
453 .globl bn_sub_words | |
454 .ent bn_sub_words | |
455 bn_sub_words: | |
456 .set noreorder | |
457 bgtz $7,bn_sub_words_internal | |
458 move $2,$0 | |
459 jr $31 | |
460 move $4,$0 | |
461 .end bn_sub_words | |
462 | |
463 .align 5 | |
464 .ent bn_sub_words_internal | |
465 bn_sub_words_internal: | |
466 .set reorder | |
467 li $3,-4 | |
468 and $1,$7,$3 | |
469 lw $12,0($5) | |
470 beqz $1,.L_bn_sub_words_tail | |
471 | |
472 .L_bn_sub_words_loop: | |
473 lw $8,0($6) | |
474 subu $7,4 | |
475 lw $13,4($5) | |
476 and $1,$7,$3 | |
477 lw $14,2*4($5) | |
478 addu $6,4*4 | |
479 lw $15,3*4($5) | |
480 addu $4,4*4 | |
481 lw $9,-3*4($6) | |
482 addu $5,4*4 | |
483 lw $10,-2*4($6) | |
484 lw $11,-4($6) | |
485 sltu $24,$12,$8 | |
486 subu $8,$12,$8 | |
487 subu $12,$8,$2 | |
488 sgtu $2,$12,$8 | |
489 sw $12,-4*4($4) | |
490 addu $2,$24 | |
491 | |
492 sltu $25,$13,$9 | |
493 subu $9,$13,$9 | |
494 subu $13,$9,$2 | |
495 sgtu $2,$13,$9 | |
496 sw $13,-3*4($4) | |
497 addu $2,$25 | |
498 | |
499 | |
500 sltu $24,$14,$10 | |
501 subu $10,$14,$10 | |
502 subu $14,$10,$2 | |
503 sgtu $2,$14,$10 | |
504 sw $14,-2*4($4) | |
505 addu $2,$24 | |
506 | |
507 sltu $25,$15,$11 | |
508 subu $11,$15,$11 | |
509 subu $15,$11,$2 | |
510 sgtu $2,$15,$11 | |
511 sw $15,-4($4) | |
512 addu $2,$25 | |
513 | |
514 .set noreorder | |
515 bgtzl $1,.L_bn_sub_words_loop | |
516 lw $12,0($5) | |
517 | |
518 beqz $7,.L_bn_sub_words_return | |
519 nop | |
520 | |
521 .L_bn_sub_words_tail: | |
522 .set reorder | |
523 lw $12,0($5) | |
524 lw $8,0($6) | |
525 subu $7,1 | |
526 sltu $24,$12,$8 | |
527 subu $8,$12,$8 | |
528 subu $12,$8,$2 | |
529 sgtu $2,$12,$8 | |
530 sw $12,0($4) | |
531 addu $2,$24 | |
532 beqz $7,.L_bn_sub_words_return | |
533 | |
534 lw $13,4($5) | |
535 subu $7,1 | |
536 lw $9,4($6) | |
537 sltu $25,$13,$9 | |
538 subu $9,$13,$9 | |
539 subu $13,$9,$2 | |
540 sgtu $2,$13,$9 | |
541 sw $13,4($4) | |
542 addu $2,$25 | |
543 beqz $7,.L_bn_sub_words_return | |
544 | |
545 lw $14,2*4($5) | |
546 lw $10,2*4($6) | |
547 sltu $24,$14,$10 | |
548 subu $10,$14,$10 | |
549 subu $14,$10,$2 | |
550 sgtu $2,$14,$10 | |
551 sw $14,2*4($4) | |
552 addu $2,$24 | |
553 | |
554 .L_bn_sub_words_return: | |
555 .set noreorder | |
556 jr $31 | |
557 move $4,$2 | |
558 .end bn_sub_words_internal | |
559 | |
560 .align 5 | |
561 .globl bn_div_3_words | |
562 .ent bn_div_3_words | |
563 bn_div_3_words: | |
564 .set noreorder | |
565 move $7,$4 # we know that bn_div_words does not | |
566 # touch $7, $10, $11 and preserves $6 | |
567 # so that we can save two arguments | |
568 # and return address in registers | |
569 # instead of stack:-) | |
570 | |
571 lw $4,($7) | |
572 move $10,$5 | |
573 bne $4,$6,bn_div_3_words_internal | |
574 lw $5,-4($7) | |
575 li $2,-1 | |
576 jr $31 | |
577 move $4,$2 | |
578 .end bn_div_3_words | |
579 | |
580 .align 5 | |
581 .ent bn_div_3_words_internal | |
582 bn_div_3_words_internal: | |
583 .set reorder | |
584 move $11,$31 | |
585 bal bn_div_words_internal | |
586 move $31,$11 | |
587 multu $10,$2 | |
588 lw $14,-2*4($7) | |
589 move $8,$0 | |
590 mfhi $13 | |
591 mflo $12 | |
592 sltu $24,$13,$5 | |
593 .L_bn_div_3_words_inner_loop: | |
594 bnez $24,.L_bn_div_3_words_inner_loop_done | |
595 sgeu $1,$14,$12 | |
596 seq $25,$13,$5 | |
597 and $1,$25 | |
598 sltu $15,$12,$10 | |
599 addu $5,$6 | |
600 subu $13,$15 | |
601 subu $12,$10 | |
602 sltu $24,$13,$5 | |
603 sltu $8,$5,$6 | |
604 or $24,$8 | |
605 .set noreorder | |
606 beqzl $1,.L_bn_div_3_words_inner_loop | |
607 subu $2,1 | |
608 .set reorder | |
609 .L_bn_div_3_words_inner_loop_done: | |
610 .set noreorder | |
611 jr $31 | |
612 move $4,$2 | |
613 .end bn_div_3_words_internal | |
614 | |
615 .align 5 | |
616 .globl bn_div_words | |
617 .ent bn_div_words | |
618 bn_div_words: | |
619 .set noreorder | |
620 bnez $6,bn_div_words_internal | |
621 li $2,-1 # I would rather signal div-by-zero | |
622 # which can be done with 'break 7' | |
623 jr $31 | |
624 move $4,$2 | |
625 .end bn_div_words | |
626 | |
627 .align 5 | |
628 .ent bn_div_words_internal | |
629 bn_div_words_internal: | |
630 move $3,$0 | |
631 bltz $6,.L_bn_div_words_body | |
632 move $25,$3 | |
633 sll $6,1 | |
634 bgtz $6,.-4 | |
635 addu $25,1 | |
636 | |
637 .set reorder | |
638 negu $13,$25 | |
639 li $14,-1 | |
640 sll $14,$13 | |
641 and $14,$4 | |
642 srl $1,$5,$13 | |
643 .set noreorder | |
644 bnezl $14,.+8 | |
645 break 6 # signal overflow | |
646 .set reorder | |
647 sll $4,$25 | |
648 sll $5,$25 | |
649 or $4,$1 | |
650 .L_bn_div_words_body: | |
651 srl $3,$6,4*4 # bits | |
652 sgeu $1,$4,$6 | |
653 .set noreorder | |
654 bnezl $1,.+8 | |
655 subu $4,$6 | |
656 .set reorder | |
657 | |
658 li $8,-1 | |
659 srl $9,$4,4*4 # bits | |
660 srl $8,4*4 # q=0xffffffff | |
661 beq $3,$9,.L_bn_div_words_skip_div1 | |
662 divu $0,$4,$3 | |
663 mflo $8 | |
664 .L_bn_div_words_skip_div1: | |
665 multu $6,$8 | |
666 sll $15,$4,4*4 # bits | |
667 srl $1,$5,4*4 # bits | |
668 or $15,$1 | |
669 mflo $12 | |
670 mfhi $13 | |
671 .L_bn_div_words_inner_loop1: | |
672 sltu $14,$15,$12 | |
673 seq $24,$9,$13 | |
674 sltu $1,$9,$13 | |
675 and $14,$24 | |
676 sltu $2,$12,$6 | |
677 or $1,$14 | |
678 .set noreorder | |
679 beqz $1,.L_bn_div_words_inner_loop1_done | |
680 subu $13,$2 | |
681 subu $12,$6 | |
682 b .L_bn_div_words_inner_loop1 | |
683 subu $8,1 | |
684 .set reorder | |
685 .L_bn_div_words_inner_loop1_done: | |
686 | |
687 sll $5,4*4 # bits | |
688 subu $4,$15,$12 | |
689 sll $2,$8,4*4 # bits | |
690 | |
691 li $8,-1 | |
692 srl $9,$4,4*4 # bits | |
693 srl $8,4*4 # q=0xffffffff | |
694 beq $3,$9,.L_bn_div_words_skip_div2 | |
695 divu $0,$4,$3 | |
696 mflo $8 | |
697 .L_bn_div_words_skip_div2: | |
698 multu $6,$8 | |
699 sll $15,$4,4*4 # bits | |
700 srl $1,$5,4*4 # bits | |
701 or $15,$1 | |
702 mflo $12 | |
703 mfhi $13 | |
704 .L_bn_div_words_inner_loop2: | |
705 sltu $14,$15,$12 | |
706 seq $24,$9,$13 | |
707 sltu $1,$9,$13 | |
708 and $14,$24 | |
709 sltu $3,$12,$6 | |
710 or $1,$14 | |
711 .set noreorder | |
712 beqz $1,.L_bn_div_words_inner_loop2_done | |
713 subu $13,$3 | |
714 subu $12,$6 | |
715 b .L_bn_div_words_inner_loop2 | |
716 subu $8,1 | |
717 .set reorder | |
718 .L_bn_div_words_inner_loop2_done: | |
719 | |
720 subu $4,$15,$12 | |
721 or $2,$8 | |
722 srl $3,$4,$25 # $3 contains remainder if anybody wants it | |
723 srl $6,$25 # restore $6 | |
724 | |
725 .set noreorder | |
726 move $5,$3 | |
727 jr $31 | |
728 move $4,$2 | |
729 .end bn_div_words_internal | |
730 | |
731 .align 5 | |
732 .globl bn_mul_comba8 | |
733 .ent bn_mul_comba8 | |
734 bn_mul_comba8: | |
735 .set noreorder | |
736 .frame $29,6*4,$31 | |
737 .mask 0x003f0000,-4 | |
738 subu $29,6*4 | |
739 sw $21,5*4($29) | |
740 sw $20,4*4($29) | |
741 sw $19,3*4($29) | |
742 sw $18,2*4($29) | |
743 sw $17,1*4($29) | |
744 sw $16,0*4($29) | |
745 | |
746 .set reorder | |
747 lw $12,0($5) # If compiled with -mips3 option on | |
748 # R5000 box assembler barks on this | |
749 # 1ine with "should not have mult/div | |
750 # as last instruction in bb (R10K | |
751 # bug)" warning. If anybody out there | |
752 # has a clue about how to circumvent | |
753 # this do send me a note. | |
754 # <appro@fy.chalmers.se> | |
755 | |
756 lw $8,0($6) | |
757 lw $13,4($5) | |
758 lw $14,2*4($5) | |
759 multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3); | |
760 lw $15,3*4($5) | |
761 lw $9,4($6) | |
762 lw $10,2*4($6) | |
763 lw $11,3*4($6) | |
764 mflo $2 | |
765 mfhi $3 | |
766 | |
767 lw $16,4*4($5) | |
768 lw $18,5*4($5) | |
769 multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1); | |
770 lw $20,6*4($5) | |
771 lw $5,7*4($5) | |
772 lw $17,4*4($6) | |
773 lw $19,5*4($6) | |
774 mflo $24 | |
775 mfhi $25 | |
776 addu $3,$24 | |
777 sltu $1,$3,$24 | |
778 multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1); | |
779 addu $7,$25,$1 | |
780 lw $21,6*4($6) | |
781 lw $6,7*4($6) | |
782 sw $2,0($4) # r[0]=c1; | |
783 mflo $24 | |
784 mfhi $25 | |
785 addu $3,$24 | |
786 sltu $1,$3,$24 | |
787 multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2); | |
788 addu $25,$1 | |
789 addu $7,$25 | |
790 sltu $2,$7,$25 | |
791 sw $3,4($4) # r[1]=c2; | |
792 | |
793 mflo $24 | |
794 mfhi $25 | |
795 addu $7,$24 | |
796 sltu $1,$7,$24 | |
797 multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2); | |
798 addu $25,$1 | |
799 addu $2,$25 | |
800 mflo $24 | |
801 mfhi $25 | |
802 addu $7,$24 | |
803 sltu $1,$7,$24 | |
804 multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2); | |
805 addu $25,$1 | |
806 addu $2,$25 | |
807 sltu $3,$2,$25 | |
808 mflo $24 | |
809 mfhi $25 | |
810 addu $7,$24 | |
811 sltu $1,$7,$24 | |
812 multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3); | |
813 addu $25,$1 | |
814 addu $2,$25 | |
815 sltu $1,$2,$25 | |
816 addu $3,$1 | |
817 sw $7,2*4($4) # r[2]=c3; | |
818 | |
819 mflo $24 | |
820 mfhi $25 | |
821 addu $2,$24 | |
822 sltu $1,$2,$24 | |
823 multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3); | |
824 addu $25,$1 | |
825 addu $3,$25 | |
826 sltu $7,$3,$25 | |
827 mflo $24 | |
828 mfhi $25 | |
829 addu $2,$24 | |
830 sltu $1,$2,$24 | |
831 multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3); | |
832 addu $25,$1 | |
833 addu $3,$25 | |
834 sltu $1,$3,$25 | |
835 addu $7,$1 | |
836 mflo $24 | |
837 mfhi $25 | |
838 addu $2,$24 | |
839 sltu $1,$2,$24 | |
840 multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3); | |
841 addu $25,$1 | |
842 addu $3,$25 | |
843 sltu $1,$3,$25 | |
844 addu $7,$1 | |
845 mflo $24 | |
846 mfhi $25 | |
847 addu $2,$24 | |
848 sltu $1,$2,$24 | |
849 multu $16,$8 # mul_add_c(a[4],b[0],c2,c3,c1); | |
850 addu $25,$1 | |
851 addu $3,$25 | |
852 sltu $1,$3,$25 | |
853 addu $7,$1 | |
854 sw $2,3*4($4) # r[3]=c1; | |
855 | |
856 mflo $24 | |
857 mfhi $25 | |
858 addu $3,$24 | |
859 sltu $1,$3,$24 | |
860 multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1); | |
861 addu $25,$1 | |
862 addu $7,$25 | |
863 sltu $2,$7,$25 | |
864 mflo $24 | |
865 mfhi $25 | |
866 addu $3,$24 | |
867 sltu $1,$3,$24 | |
868 multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1); | |
869 addu $25,$1 | |
870 addu $7,$25 | |
871 sltu $1,$7,$25 | |
872 addu $2,$1 | |
873 mflo $24 | |
874 mfhi $25 | |
875 addu $3,$24 | |
876 sltu $1,$3,$24 | |
877 multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1); | |
878 addu $25,$1 | |
879 addu $7,$25 | |
880 sltu $1,$7,$25 | |
881 addu $2,$1 | |
882 mflo $24 | |
883 mfhi $25 | |
884 addu $3,$24 | |
885 sltu $1,$3,$24 | |
886 multu $12,$17 # mul_add_c(a[0],b[4],c2,c3,c1); | |
887 addu $25,$1 | |
888 addu $7,$25 | |
889 sltu $1,$7,$25 | |
890 addu $2,$1 | |
891 mflo $24 | |
892 mfhi $25 | |
893 addu $3,$24 | |
894 sltu $1,$3,$24 | |
895 multu $12,$19 # mul_add_c(a[0],b[5],c3,c1,c2); | |
896 addu $25,$1 | |
897 addu $7,$25 | |
898 sltu $1,$7,$25 | |
899 addu $2,$1 | |
900 sw $3,4*4($4) # r[4]=c2; | |
901 | |
902 mflo $24 | |
903 mfhi $25 | |
904 addu $7,$24 | |
905 sltu $1,$7,$24 | |
906 multu $13,$17 # mul_add_c(a[1],b[4],c3,c1,c2); | |
907 addu $25,$1 | |
908 addu $2,$25 | |
909 sltu $3,$2,$25 | |
910 mflo $24 | |
911 mfhi $25 | |
912 addu $7,$24 | |
913 sltu $1,$7,$24 | |
914 multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2); | |
915 addu $25,$1 | |
916 addu $2,$25 | |
917 sltu $1,$2,$25 | |
918 addu $3,$1 | |
919 mflo $24 | |
920 mfhi $25 | |
921 addu $7,$24 | |
922 sltu $1,$7,$24 | |
923 multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2); | |
924 addu $25,$1 | |
925 addu $2,$25 | |
926 sltu $1,$2,$25 | |
927 addu $3,$1 | |
928 mflo $24 | |
929 mfhi $25 | |
930 addu $7,$24 | |
931 sltu $1,$7,$24 | |
932 multu $16,$9 # mul_add_c(a[4],b[1],c3,c1,c2); | |
933 addu $25,$1 | |
934 addu $2,$25 | |
935 sltu $1,$2,$25 | |
936 addu $3,$1 | |
937 mflo $24 | |
938 mfhi $25 | |
939 addu $7,$24 | |
940 sltu $1,$7,$24 | |
941 multu $18,$8 # mul_add_c(a[5],b[0],c3,c1,c2); | |
942 addu $25,$1 | |
943 addu $2,$25 | |
944 sltu $1,$2,$25 | |
945 addu $3,$1 | |
946 mflo $24 | |
947 mfhi $25 | |
948 addu $7,$24 | |
949 sltu $1,$7,$24 | |
950 multu $20,$8 # mul_add_c(a[6],b[0],c1,c2,c3); | |
951 addu $25,$1 | |
952 addu $2,$25 | |
953 sltu $1,$2,$25 | |
954 addu $3,$1 | |
955 sw $7,5*4($4) # r[5]=c3; | |
956 | |
957 mflo $24 | |
958 mfhi $25 | |
959 addu $2,$24 | |
960 sltu $1,$2,$24 | |
961 multu $18,$9 # mul_add_c(a[5],b[1],c1,c2,c3); | |
962 addu $25,$1 | |
963 addu $3,$25 | |
964 sltu $7,$3,$25 | |
965 mflo $24 | |
966 mfhi $25 | |
967 addu $2,$24 | |
968 sltu $1,$2,$24 | |
969 multu $16,$10 # mul_add_c(a[4],b[2],c1,c2,c3); | |
970 addu $25,$1 | |
971 addu $3,$25 | |
972 sltu $1,$3,$25 | |
973 addu $7,$1 | |
974 mflo $24 | |
975 mfhi $25 | |
976 addu $2,$24 | |
977 sltu $1,$2,$24 | |
978 multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3); | |
979 addu $25,$1 | |
980 addu $3,$25 | |
981 sltu $1,$3,$25 | |
982 addu $7,$1 | |
983 mflo $24 | |
984 mfhi $25 | |
985 addu $2,$24 | |
986 sltu $1,$2,$24 | |
987 multu $14,$17 # mul_add_c(a[2],b[4],c1,c2,c3); | |
988 addu $25,$1 | |
989 addu $3,$25 | |
990 sltu $1,$3,$25 | |
991 addu $7,$1 | |
992 mflo $24 | |
993 mfhi $25 | |
994 addu $2,$24 | |
995 sltu $1,$2,$24 | |
996 multu $13,$19 # mul_add_c(a[1],b[5],c1,c2,c3); | |
997 addu $25,$1 | |
998 addu $3,$25 | |
999 sltu $1,$3,$25 | |
1000 addu $7,$1 | |
1001 mflo $24 | |
1002 mfhi $25 | |
1003 addu $2,$24 | |
1004 sltu $1,$2,$24 | |
1005 multu $12,$21 # mul_add_c(a[0],b[6],c1,c2,c3); | |
1006 addu $25,$1 | |
1007 addu $3,$25 | |
1008 sltu $1,$3,$25 | |
1009 addu $7,$1 | |
1010 mflo $24 | |
1011 mfhi $25 | |
1012 addu $2,$24 | |
1013 sltu $1,$2,$24 | |
1014 multu $12,$6 # mul_add_c(a[0],b[7],c2,c3,c1); | |
1015 addu $25,$1 | |
1016 addu $3,$25 | |
1017 sltu $1,$3,$25 | |
1018 addu $7,$1 | |
1019 sw $2,6*4($4) # r[6]=c1; | |
1020 | |
1021 mflo $24 | |
1022 mfhi $25 | |
1023 addu $3,$24 | |
1024 sltu $1,$3,$24 | |
1025 multu $13,$21 # mul_add_c(a[1],b[6],c2,c3,c1); | |
1026 addu $25,$1 | |
1027 addu $7,$25 | |
1028 sltu $2,$7,$25 | |
1029 mflo $24 | |
1030 mfhi $25 | |
1031 addu $3,$24 | |
1032 sltu $1,$3,$24 | |
1033 multu $14,$19 # mul_add_c(a[2],b[5],c2,c3,c1); | |
1034 addu $25,$1 | |
1035 addu $7,$25 | |
1036 sltu $1,$7,$25 | |
1037 addu $2,$1 | |
1038 mflo $24 | |
1039 mfhi $25 | |
1040 addu $3,$24 | |
1041 sltu $1,$3,$24 | |
1042 multu $15,$17 # mul_add_c(a[3],b[4],c2,c3,c1); | |
1043 addu $25,$1 | |
1044 addu $7,$25 | |
1045 sltu $1,$7,$25 | |
1046 addu $2,$1 | |
1047 mflo $24 | |
1048 mfhi $25 | |
1049 addu $3,$24 | |
1050 sltu $1,$3,$24 | |
1051 multu $16,$11 # mul_add_c(a[4],b[3],c2,c3,c1); | |
1052 addu $25,$1 | |
1053 addu $7,$25 | |
1054 sltu $1,$7,$25 | |
1055 addu $2,$1 | |
1056 mflo $24 | |
1057 mfhi $25 | |
1058 addu $3,$24 | |
1059 sltu $1,$3,$24 | |
1060 multu $18,$10 # mul_add_c(a[5],b[2],c2,c3,c1); | |
1061 addu $25,$1 | |
1062 addu $7,$25 | |
1063 sltu $1,$7,$25 | |
1064 addu $2,$1 | |
1065 mflo $24 | |
1066 mfhi $25 | |
1067 addu $3,$24 | |
1068 sltu $1,$3,$24 | |
1069 multu $20,$9 # mul_add_c(a[6],b[1],c2,c3,c1); | |
1070 addu $25,$1 | |
1071 addu $7,$25 | |
1072 sltu $1,$7,$25 | |
1073 addu $2,$1 | |
1074 mflo $24 | |
1075 mfhi $25 | |
1076 addu $3,$24 | |
1077 sltu $1,$3,$24 | |
1078 multu $5,$8 # mul_add_c(a[7],b[0],c2,c3,c1); | |
1079 addu $25,$1 | |
1080 addu $7,$25 | |
1081 sltu $1,$7,$25 | |
1082 addu $2,$1 | |
1083 mflo $24 | |
1084 mfhi $25 | |
1085 addu $3,$24 | |
1086 sltu $1,$3,$24 | |
1087 multu $5,$9 # mul_add_c(a[7],b[1],c3,c1,c2); | |
1088 addu $25,$1 | |
1089 addu $7,$25 | |
1090 sltu $1,$7,$25 | |
1091 addu $2,$1 | |
1092 sw $3,7*4($4) # r[7]=c2; | |
1093 | |
1094 mflo $24 | |
1095 mfhi $25 | |
1096 addu $7,$24 | |
1097 sltu $1,$7,$24 | |
1098 multu $20,$10 # mul_add_c(a[6],b[2],c3,c1,c2); | |
1099 addu $25,$1 | |
1100 addu $2,$25 | |
1101 sltu $3,$2,$25 | |
1102 mflo $24 | |
1103 mfhi $25 | |
1104 addu $7,$24 | |
1105 sltu $1,$7,$24 | |
1106 multu $18,$11 # mul_add_c(a[5],b[3],c3,c1,c2); | |
1107 addu $25,$1 | |
1108 addu $2,$25 | |
1109 sltu $1,$2,$25 | |
1110 addu $3,$1 | |
1111 mflo $24 | |
1112 mfhi $25 | |
1113 addu $7,$24 | |
1114 sltu $1,$7,$24 | |
1115 multu $16,$17 # mul_add_c(a[4],b[4],c3,c1,c2); | |
1116 addu $25,$1 | |
1117 addu $2,$25 | |
1118 sltu $1,$2,$25 | |
1119 addu $3,$1 | |
1120 mflo $24 | |
1121 mfhi $25 | |
1122 addu $7,$24 | |
1123 sltu $1,$7,$24 | |
1124 multu $15,$19 # mul_add_c(a[3],b[5],c3,c1,c2); | |
1125 addu $25,$1 | |
1126 addu $2,$25 | |
1127 sltu $1,$2,$25 | |
1128 addu $3,$1 | |
1129 mflo $24 | |
1130 mfhi $25 | |
1131 addu $7,$24 | |
1132 sltu $1,$7,$24 | |
1133 multu $14,$21 # mul_add_c(a[2],b[6],c3,c1,c2); | |
1134 addu $25,$1 | |
1135 addu $2,$25 | |
1136 sltu $1,$2,$25 | |
1137 addu $3,$1 | |
1138 mflo $24 | |
1139 mfhi $25 | |
1140 addu $7,$24 | |
1141 sltu $1,$7,$24 | |
1142 multu $13,$6 # mul_add_c(a[1],b[7],c3,c1,c2); | |
1143 addu $25,$1 | |
1144 addu $2,$25 | |
1145 sltu $1,$2,$25 | |
1146 addu $3,$1 | |
1147 mflo $24 | |
1148 mfhi $25 | |
1149 addu $7,$24 | |
1150 sltu $1,$7,$24 | |
1151 multu $14,$6 # mul_add_c(a[2],b[7],c1,c2,c3); | |
1152 addu $25,$1 | |
1153 addu $2,$25 | |
1154 sltu $1,$2,$25 | |
1155 addu $3,$1 | |
1156 sw $7,8*4($4) # r[8]=c3; | |
1157 | |
1158 mflo $24 | |
1159 mfhi $25 | |
1160 addu $2,$24 | |
1161 sltu $1,$2,$24 | |
1162 multu $15,$21 # mul_add_c(a[3],b[6],c1,c2,c3); | |
1163 addu $25,$1 | |
1164 addu $3,$25 | |
1165 sltu $7,$3,$25 | |
1166 mflo $24 | |
1167 mfhi $25 | |
1168 addu $2,$24 | |
1169 sltu $1,$2,$24 | |
1170 multu $16,$19 # mul_add_c(a[4],b[5],c1,c2,c3); | |
1171 addu $25,$1 | |
1172 addu $3,$25 | |
1173 sltu $1,$3,$25 | |
1174 addu $7,$1 | |
1175 mflo $24 | |
1176 mfhi $25 | |
1177 addu $2,$24 | |
1178 sltu $1,$2,$24 | |
1179 multu $18,$17 # mul_add_c(a[5],b[4],c1,c2,c3); | |
1180 addu $25,$1 | |
1181 addu $3,$25 | |
1182 sltu $1,$3,$25 | |
1183 addu $7,$1 | |
1184 mflo $24 | |
1185 mfhi $25 | |
1186 addu $2,$24 | |
1187 sltu $1,$2,$24 | |
1188 multu $20,$11 # mul_add_c(a[6],b[3],c1,c2,c3); | |
1189 addu $25,$1 | |
1190 addu $3,$25 | |
1191 sltu $1,$3,$25 | |
1192 addu $7,$1 | |
1193 mflo $24 | |
1194 mfhi $25 | |
1195 addu $2,$24 | |
1196 sltu $1,$2,$24 | |
1197 multu $5,$10 # mul_add_c(a[7],b[2],c1,c2,c3); | |
1198 addu $25,$1 | |
1199 addu $3,$25 | |
1200 sltu $1,$3,$25 | |
1201 addu $7,$1 | |
1202 mflo $24 | |
1203 mfhi $25 | |
1204 addu $2,$24 | |
1205 sltu $1,$2,$24 | |
1206 multu $5,$11 # mul_add_c(a[7],b[3],c2,c3,c1); | |
1207 addu $25,$1 | |
1208 addu $3,$25 | |
1209 sltu $1,$3,$25 | |
1210 addu $7,$1 | |
1211 sw $2,9*4($4) # r[9]=c1; | |
1212 | |
1213 mflo $24 | |
1214 mfhi $25 | |
1215 addu $3,$24 | |
1216 sltu $1,$3,$24 | |
1217 multu $20,$17 # mul_add_c(a[6],b[4],c2,c3,c1); | |
1218 addu $25,$1 | |
1219 addu $7,$25 | |
1220 sltu $2,$7,$25 | |
1221 mflo $24 | |
1222 mfhi $25 | |
1223 addu $3,$24 | |
1224 sltu $1,$3,$24 | |
1225 multu $18,$19 # mul_add_c(a[5],b[5],c2,c3,c1); | |
1226 addu $25,$1 | |
1227 addu $7,$25 | |
1228 sltu $1,$7,$25 | |
1229 addu $2,$1 | |
1230 mflo $24 | |
1231 mfhi $25 | |
1232 addu $3,$24 | |
1233 sltu $1,$3,$24 | |
1234 multu $16,$21 # mul_add_c(a[4],b[6],c2,c3,c1); | |
1235 addu $25,$1 | |
1236 addu $7,$25 | |
1237 sltu $1,$7,$25 | |
1238 addu $2,$1 | |
1239 mflo $24 | |
1240 mfhi $25 | |
1241 addu $3,$24 | |
1242 sltu $1,$3,$24 | |
1243 multu $15,$6 # mul_add_c(a[3],b[7],c2,c3,c1); | |
1244 addu $25,$1 | |
1245 addu $7,$25 | |
1246 sltu $1,$7,$25 | |
1247 addu $2,$1 | |
1248 mflo $24 | |
1249 mfhi $25 | |
1250 addu $3,$24 | |
1251 sltu $1,$3,$24 | |
1252 multu $16,$6 # mul_add_c(a[4],b[7],c3,c1,c2); | |
1253 addu $25,$1 | |
1254 addu $7,$25 | |
1255 sltu $1,$7,$25 | |
1256 addu $2,$1 | |
1257 sw $3,10*4($4) # r[10]=c2; | |
1258 | |
1259 mflo $24 | |
1260 mfhi $25 | |
1261 addu $7,$24 | |
1262 sltu $1,$7,$24 | |
1263 multu $18,$21 # mul_add_c(a[5],b[6],c3,c1,c2); | |
1264 addu $25,$1 | |
1265 addu $2,$25 | |
1266 sltu $3,$2,$25 | |
1267 mflo $24 | |
1268 mfhi $25 | |
1269 addu $7,$24 | |
1270 sltu $1,$7,$24 | |
1271 multu $20,$19 # mul_add_c(a[6],b[5],c3,c1,c2); | |
1272 addu $25,$1 | |
1273 addu $2,$25 | |
1274 sltu $1,$2,$25 | |
1275 addu $3,$1 | |
1276 mflo $24 | |
1277 mfhi $25 | |
1278 addu $7,$24 | |
1279 sltu $1,$7,$24 | |
1280 multu $5,$17 # mul_add_c(a[7],b[4],c3,c1,c2); | |
1281 addu $25,$1 | |
1282 addu $2,$25 | |
1283 sltu $1,$2,$25 | |
1284 addu $3,$1 | |
1285 mflo $24 | |
1286 mfhi $25 | |
1287 addu $7,$24 | |
1288 sltu $1,$7,$24 | |
1289 multu $5,$19 # mul_add_c(a[7],b[5],c1,c2,c3); | |
1290 addu $25,$1 | |
1291 addu $2,$25 | |
1292 sltu $1,$2,$25 | |
1293 addu $3,$1 | |
1294 sw $7,11*4($4) # r[11]=c3; | |
1295 | |
1296 mflo $24 | |
1297 mfhi $25 | |
1298 addu $2,$24 | |
1299 sltu $1,$2,$24 | |
1300 multu $20,$21 # mul_add_c(a[6],b[6],c1,c2,c3); | |
1301 addu $25,$1 | |
1302 addu $3,$25 | |
1303 sltu $7,$3,$25 | |
1304 mflo $24 | |
1305 mfhi $25 | |
1306 addu $2,$24 | |
1307 sltu $1,$2,$24 | |
1308 multu $18,$6 # mul_add_c(a[5],b[7],c1,c2,c3); | |
1309 addu $25,$1 | |
1310 addu $3,$25 | |
1311 sltu $1,$3,$25 | |
1312 addu $7,$1 | |
1313 mflo $24 | |
1314 mfhi $25 | |
1315 addu $2,$24 | |
1316 sltu $1,$2,$24 | |
1317 multu $20,$6 # mul_add_c(a[6],b[7],c2,c3,c1); | |
1318 addu $25,$1 | |
1319 addu $3,$25 | |
1320 sltu $1,$3,$25 | |
1321 addu $7,$1 | |
1322 sw $2,12*4($4) # r[12]=c1; | |
1323 | |
1324 mflo $24 | |
1325 mfhi $25 | |
1326 addu $3,$24 | |
1327 sltu $1,$3,$24 | |
1328 multu $5,$21 # mul_add_c(a[7],b[6],c2,c3,c1); | |
1329 addu $25,$1 | |
1330 addu $7,$25 | |
1331 sltu $2,$7,$25 | |
1332 mflo $24 | |
1333 mfhi $25 | |
1334 addu $3,$24 | |
1335 sltu $1,$3,$24 | |
1336 multu $5,$6 # mul_add_c(a[7],b[7],c3,c1,c2); | |
1337 addu $25,$1 | |
1338 addu $7,$25 | |
1339 sltu $1,$7,$25 | |
1340 addu $2,$1 | |
1341 sw $3,13*4($4) # r[13]=c2; | |
1342 | |
1343 mflo $24 | |
1344 mfhi $25 | |
1345 addu $7,$24 | |
1346 sltu $1,$7,$24 | |
1347 addu $25,$1 | |
1348 addu $2,$25 | |
1349 sw $7,14*4($4) # r[14]=c3; | |
1350 sw $2,15*4($4) # r[15]=c1; | |
1351 | |
1352 .set noreorder | |
1353 lw $21,5*4($29) | |
1354 lw $20,4*4($29) | |
1355 lw $19,3*4($29) | |
1356 lw $18,2*4($29) | |
1357 lw $17,1*4($29) | |
1358 lw $16,0*4($29) | |
1359 jr $31 | |
1360 addu $29,6*4 | |
1361 .end bn_mul_comba8 | |
1362 | |
1363 .align 5 | |
1364 .globl bn_mul_comba4 | |
1365 .ent bn_mul_comba4 | |
1366 bn_mul_comba4: | |
1367 .set reorder | |
1368 lw $12,0($5) | |
1369 lw $8,0($6) | |
1370 lw $13,4($5) | |
1371 lw $14,2*4($5) | |
1372 multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3); | |
1373 lw $15,3*4($5) | |
1374 lw $9,4($6) | |
1375 lw $10,2*4($6) | |
1376 lw $11,3*4($6) | |
1377 mflo $2 | |
1378 mfhi $3 | |
1379 sw $2,0($4) | |
1380 | |
1381 multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1); | |
1382 mflo $24 | |
1383 mfhi $25 | |
1384 addu $3,$24 | |
1385 sltu $1,$3,$24 | |
1386 multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1); | |
1387 addu $7,$25,$1 | |
1388 mflo $24 | |
1389 mfhi $25 | |
1390 addu $3,$24 | |
1391 sltu $1,$3,$24 | |
1392 multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2); | |
1393 addu $25,$1 | |
1394 addu $7,$25 | |
1395 sltu $2,$7,$25 | |
1396 sw $3,4($4) | |
1397 | |
1398 mflo $24 | |
1399 mfhi $25 | |
1400 addu $7,$24 | |
1401 sltu $1,$7,$24 | |
1402 multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2); | |
1403 addu $25,$1 | |
1404 addu $2,$25 | |
1405 mflo $24 | |
1406 mfhi $25 | |
1407 addu $7,$24 | |
1408 sltu $1,$7,$24 | |
1409 multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2); | |
1410 addu $25,$1 | |
1411 addu $2,$25 | |
1412 sltu $3,$2,$25 | |
1413 mflo $24 | |
1414 mfhi $25 | |
1415 addu $7,$24 | |
1416 sltu $1,$7,$24 | |
1417 multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3); | |
1418 addu $25,$1 | |
1419 addu $2,$25 | |
1420 sltu $1,$2,$25 | |
1421 addu $3,$1 | |
1422 sw $7,2*4($4) | |
1423 | |
1424 mflo $24 | |
1425 mfhi $25 | |
1426 addu $2,$24 | |
1427 sltu $1,$2,$24 | |
1428 multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3); | |
1429 addu $25,$1 | |
1430 addu $3,$25 | |
1431 sltu $7,$3,$25 | |
1432 mflo $24 | |
1433 mfhi $25 | |
1434 addu $2,$24 | |
1435 sltu $1,$2,$24 | |
1436 multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3); | |
1437 addu $25,$1 | |
1438 addu $3,$25 | |
1439 sltu $1,$3,$25 | |
1440 addu $7,$1 | |
1441 mflo $24 | |
1442 mfhi $25 | |
1443 addu $2,$24 | |
1444 sltu $1,$2,$24 | |
1445 multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3); | |
1446 addu $25,$1 | |
1447 addu $3,$25 | |
1448 sltu $1,$3,$25 | |
1449 addu $7,$1 | |
1450 mflo $24 | |
1451 mfhi $25 | |
1452 addu $2,$24 | |
1453 sltu $1,$2,$24 | |
1454 multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1); | |
1455 addu $25,$1 | |
1456 addu $3,$25 | |
1457 sltu $1,$3,$25 | |
1458 addu $7,$1 | |
1459 sw $2,3*4($4) | |
1460 | |
1461 mflo $24 | |
1462 mfhi $25 | |
1463 addu $3,$24 | |
1464 sltu $1,$3,$24 | |
1465 multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1); | |
1466 addu $25,$1 | |
1467 addu $7,$25 | |
1468 sltu $2,$7,$25 | |
1469 mflo $24 | |
1470 mfhi $25 | |
1471 addu $3,$24 | |
1472 sltu $1,$3,$24 | |
1473 multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1); | |
1474 addu $25,$1 | |
1475 addu $7,$25 | |
1476 sltu $1,$7,$25 | |
1477 addu $2,$1 | |
1478 mflo $24 | |
1479 mfhi $25 | |
1480 addu $3,$24 | |
1481 sltu $1,$3,$24 | |
1482 multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2); | |
1483 addu $25,$1 | |
1484 addu $7,$25 | |
1485 sltu $1,$7,$25 | |
1486 addu $2,$1 | |
1487 sw $3,4*4($4) | |
1488 | |
1489 mflo $24 | |
1490 mfhi $25 | |
1491 addu $7,$24 | |
1492 sltu $1,$7,$24 | |
1493 multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2); | |
1494 addu $25,$1 | |
1495 addu $2,$25 | |
1496 sltu $3,$2,$25 | |
1497 mflo $24 | |
1498 mfhi $25 | |
1499 addu $7,$24 | |
1500 sltu $1,$7,$24 | |
1501 multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3); | |
1502 addu $25,$1 | |
1503 addu $2,$25 | |
1504 sltu $1,$2,$25 | |
1505 addu $3,$1 | |
1506 sw $7,5*4($4) | |
1507 | |
1508 mflo $24 | |
1509 mfhi $25 | |
1510 addu $2,$24 | |
1511 sltu $1,$2,$24 | |
1512 addu $25,$1 | |
1513 addu $3,$25 | |
1514 sw $2,6*4($4) | |
1515 sw $3,7*4($4) | |
1516 | |
1517 .set noreorder | |
1518 jr $31 | |
1519 nop | |
1520 .end bn_mul_comba4 | |
1521 | |
1522 .align 5 | |
1523 .globl bn_sqr_comba8 | |
1524 .ent bn_sqr_comba8 | |
1525 bn_sqr_comba8: | |
1526 .set reorder | |
1527 lw $12,0($5) | |
1528 lw $13,4($5) | |
1529 lw $14,2*4($5) | |
1530 lw $15,3*4($5) | |
1531 | |
1532 multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3); | |
1533 lw $8,4*4($5) | |
1534 lw $9,5*4($5) | |
1535 lw $10,6*4($5) | |
1536 lw $11,7*4($5) | |
1537 mflo $2 | |
1538 mfhi $3 | |
1539 sw $2,0($4) | |
1540 | |
1541 multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1); | |
1542 mflo $24 | |
1543 mfhi $25 | |
1544 slt $2,$25,$0 | |
1545 sll $25,1 | |
1546 multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2); | |
1547 slt $6,$24,$0 | |
1548 addu $25,$6 | |
1549 sll $24,1 | |
1550 addu $3,$24 | |
1551 sltu $1,$3,$24 | |
1552 addu $7,$25,$1 | |
1553 sw $3,4($4) | |
1554 | |
1555 mflo $24 | |
1556 mfhi $25 | |
1557 slt $3,$25,$0 | |
1558 sll $25,1 | |
1559 multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2); | |
1560 slt $6,$24,$0 | |
1561 addu $25,$6 | |
1562 sll $24,1 | |
1563 addu $7,$24 | |
1564 sltu $1,$7,$24 | |
1565 addu $25,$1 | |
1566 addu $2,$25 | |
1567 sltu $1,$2,$25 | |
1568 addu $3,$1 | |
1569 mflo $24 | |
1570 mfhi $25 | |
1571 addu $7,$24 | |
1572 sltu $1,$7,$24 | |
1573 multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3); | |
1574 addu $25,$1 | |
1575 addu $2,$25 | |
1576 sltu $1,$2,$25 | |
1577 addu $3,$1 | |
1578 sw $7,2*4($4) | |
1579 | |
1580 mflo $24 | |
1581 mfhi $25 | |
1582 slt $7,$25,$0 | |
1583 sll $25,1 | |
1584 multu $13,$14 # mul_add_c2(a[1],b[2],c1,c2,c3); | |
1585 slt $6,$24,$0 | |
1586 addu $25,$6 | |
1587 sll $24,1 | |
1588 addu $2,$24 | |
1589 sltu $1,$2,$24 | |
1590 addu $25,$1 | |
1591 addu $3,$25 | |
1592 sltu $1,$3,$25 | |
1593 addu $7,$1 | |
1594 mflo $24 | |
1595 mfhi $25 | |
1596 slt $1,$25,$0 | |
1597 addu $7,$1 | |
1598 multu $8,$12 # mul_add_c2(a[4],b[0],c2,c3,c1); | |
1599 sll $25,1 | |
1600 slt $6,$24,$0 | |
1601 addu $25,$6 | |
1602 sll $24,1 | |
1603 addu $2,$24 | |
1604 sltu $1,$2,$24 | |
1605 addu $25,$1 | |
1606 addu $3,$25 | |
1607 sltu $1,$3,$25 | |
1608 addu $7,$1 | |
1609 sw $2,3*4($4) | |
1610 | |
1611 mflo $24 | |
1612 mfhi $25 | |
1613 slt $2,$25,$0 | |
1614 sll $25,1 | |
1615 multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1); | |
1616 slt $6,$24,$0 | |
1617 addu $25,$6 | |
1618 sll $24,1 | |
1619 addu $3,$24 | |
1620 sltu $1,$3,$24 | |
1621 addu $25,$1 | |
1622 addu $7,$25 | |
1623 sltu $1,$7,$25 | |
1624 addu $2,$1 | |
1625 mflo $24 | |
1626 mfhi $25 | |
1627 slt $1,$25,$0 | |
1628 addu $2,$1 | |
1629 multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1); | |
1630 sll $25,1 | |
1631 slt $6,$24,$0 | |
1632 addu $25,$6 | |
1633 sll $24,1 | |
1634 addu $3,$24 | |
1635 sltu $1,$3,$24 | |
1636 addu $25,$1 | |
1637 addu $7,$25 | |
1638 sltu $1,$7,$25 | |
1639 addu $2,$1 | |
1640 mflo $24 | |
1641 mfhi $25 | |
1642 addu $3,$24 | |
1643 sltu $1,$3,$24 | |
1644 multu $12,$9 # mul_add_c2(a[0],b[5],c3,c1,c2); | |
1645 addu $25,$1 | |
1646 addu $7,$25 | |
1647 sltu $1,$7,$25 | |
1648 addu $2,$1 | |
1649 sw $3,4*4($4) | |
1650 | |
1651 mflo $24 | |
1652 mfhi $25 | |
1653 slt $3,$25,$0 | |
1654 sll $25,1 | |
1655 multu $13,$8 # mul_add_c2(a[1],b[4],c3,c1,c2); | |
1656 slt $6,$24,$0 | |
1657 addu $25,$6 | |
1658 sll $24,1 | |
1659 addu $7,$24 | |
1660 sltu $1,$7,$24 | |
1661 addu $25,$1 | |
1662 addu $2,$25 | |
1663 sltu $1,$2,$25 | |
1664 addu $3,$1 | |
1665 mflo $24 | |
1666 mfhi $25 | |
1667 slt $1,$25,$0 | |
1668 addu $3,$1 | |
1669 multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2); | |
1670 sll $25,1 | |
1671 slt $6,$24,$0 | |
1672 addu $25,$6 | |
1673 sll $24,1 | |
1674 addu $7,$24 | |
1675 sltu $1,$7,$24 | |
1676 addu $25,$1 | |
1677 addu $2,$25 | |
1678 sltu $1,$2,$25 | |
1679 addu $3,$1 | |
1680 mflo $24 | |
1681 mfhi $25 | |
1682 slt $1,$25,$0 | |
1683 multu $10,$12 # mul_add_c2(a[6],b[0],c1,c2,c3); | |
1684 addu $3,$1 | |
1685 sll $25,1 | |
1686 slt $6,$24,$0 | |
1687 addu $25,$6 | |
1688 sll $24,1 | |
1689 addu $7,$24 | |
1690 sltu $1,$7,$24 | |
1691 addu $25,$1 | |
1692 addu $2,$25 | |
1693 sltu $1,$2,$25 | |
1694 addu $3,$1 | |
1695 sw $7,5*4($4) | |
1696 | |
1697 mflo $24 | |
1698 mfhi $25 | |
1699 slt $7,$25,$0 | |
1700 sll $25,1 | |
1701 multu $9,$13 # mul_add_c2(a[5],b[1],c1,c2,c3); | |
1702 slt $6,$24,$0 | |
1703 addu $25,$6 | |
1704 sll $24,1 | |
1705 addu $2,$24 | |
1706 sltu $1,$2,$24 | |
1707 addu $25,$1 | |
1708 addu $3,$25 | |
1709 sltu $1,$3,$25 | |
1710 addu $7,$1 | |
1711 mflo $24 | |
1712 mfhi $25 | |
1713 slt $1,$25,$0 | |
1714 addu $7,$1 | |
1715 multu $8,$14 # mul_add_c2(a[4],b[2],c1,c2,c3); | |
1716 sll $25,1 | |
1717 slt $6,$24,$0 | |
1718 addu $25,$6 | |
1719 sll $24,1 | |
1720 addu $2,$24 | |
1721 sltu $1,$2,$24 | |
1722 addu $25,$1 | |
1723 addu $3,$25 | |
1724 sltu $1,$3,$25 | |
1725 addu $7,$1 | |
1726 mflo $24 | |
1727 mfhi $25 | |
1728 slt $1,$25,$0 | |
1729 addu $7,$1 | |
1730 multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3); | |
1731 sll $25,1 | |
1732 slt $6,$24,$0 | |
1733 addu $25,$6 | |
1734 sll $24,1 | |
1735 addu $2,$24 | |
1736 sltu $1,$2,$24 | |
1737 addu $25,$1 | |
1738 addu $3,$25 | |
1739 sltu $1,$3,$25 | |
1740 addu $7,$1 | |
1741 mflo $24 | |
1742 mfhi $25 | |
1743 addu $2,$24 | |
1744 sltu $1,$2,$24 | |
1745 multu $12,$11 # mul_add_c2(a[0],b[7],c2,c3,c1); | |
1746 addu $25,$1 | |
1747 addu $3,$25 | |
1748 sltu $1,$3,$25 | |
1749 addu $7,$1 | |
1750 sw $2,6*4($4) | |
1751 | |
1752 mflo $24 | |
1753 mfhi $25 | |
1754 slt $2,$25,$0 | |
1755 sll $25,1 | |
1756 multu $13,$10 # mul_add_c2(a[1],b[6],c2,c3,c1); | |
1757 slt $6,$24,$0 | |
1758 addu $25,$6 | |
1759 sll $24,1 | |
1760 addu $3,$24 | |
1761 sltu $1,$3,$24 | |
1762 addu $25,$1 | |
1763 addu $7,$25 | |
1764 sltu $1,$7,$25 | |
1765 addu $2,$1 | |
1766 mflo $24 | |
1767 mfhi $25 | |
1768 slt $1,$25,$0 | |
1769 addu $2,$1 | |
1770 multu $14,$9 # mul_add_c2(a[2],b[5],c2,c3,c1); | |
1771 sll $25,1 | |
1772 slt $6,$24,$0 | |
1773 addu $25,$6 | |
1774 sll $24,1 | |
1775 addu $3,$24 | |
1776 sltu $1,$3,$24 | |
1777 addu $25,$1 | |
1778 addu $7,$25 | |
1779 sltu $1,$7,$25 | |
1780 addu $2,$1 | |
1781 mflo $24 | |
1782 mfhi $25 | |
1783 slt $1,$25,$0 | |
1784 addu $2,$1 | |
1785 multu $15,$8 # mul_add_c2(a[3],b[4],c2,c3,c1); | |
1786 sll $25,1 | |
1787 slt $6,$24,$0 | |
1788 addu $25,$6 | |
1789 sll $24,1 | |
1790 addu $3,$24 | |
1791 sltu $1,$3,$24 | |
1792 addu $25,$1 | |
1793 addu $7,$25 | |
1794 sltu $1,$7,$25 | |
1795 addu $2,$1 | |
1796 mflo $24 | |
1797 mfhi $25 | |
1798 slt $1,$25,$0 | |
1799 addu $2,$1 | |
1800 multu $11,$13 # mul_add_c2(a[7],b[1],c3,c1,c2); | |
1801 sll $25,1 | |
1802 slt $6,$24,$0 | |
1803 addu $25,$6 | |
1804 sll $24,1 | |
1805 addu $3,$24 | |
1806 sltu $1,$3,$24 | |
1807 addu $25,$1 | |
1808 addu $7,$25 | |
1809 sltu $1,$7,$25 | |
1810 addu $2,$1 | |
1811 sw $3,7*4($4) | |
1812 | |
1813 mflo $24 | |
1814 mfhi $25 | |
1815 slt $3,$25,$0 | |
1816 sll $25,1 | |
1817 multu $10,$14 # mul_add_c2(a[6],b[2],c3,c1,c2); | |
1818 slt $6,$24,$0 | |
1819 addu $25,$6 | |
1820 sll $24,1 | |
1821 addu $7,$24 | |
1822 sltu $1,$7,$24 | |
1823 addu $25,$1 | |
1824 addu $2,$25 | |
1825 sltu $1,$2,$25 | |
1826 addu $3,$1 | |
1827 mflo $24 | |
1828 mfhi $25 | |
1829 slt $1,$25,$0 | |
1830 addu $3,$1 | |
1831 multu $9,$15 # mul_add_c2(a[5],b[3],c3,c1,c2); | |
1832 sll $25,1 | |
1833 slt $6,$24,$0 | |
1834 addu $25,$6 | |
1835 sll $24,1 | |
1836 addu $7,$24 | |
1837 sltu $1,$7,$24 | |
1838 addu $25,$1 | |
1839 addu $2,$25 | |
1840 sltu $1,$2,$25 | |
1841 addu $3,$1 | |
1842 mflo $24 | |
1843 mfhi $25 | |
1844 slt $1,$25,$0 | |
1845 addu $3,$1 | |
1846 multu $8,$8 # mul_add_c(a[4],b[4],c3,c1,c2); | |
1847 sll $25,1 | |
1848 slt $6,$24,$0 | |
1849 addu $25,$6 | |
1850 sll $24,1 | |
1851 addu $7,$24 | |
1852 sltu $1,$7,$24 | |
1853 addu $25,$1 | |
1854 addu $2,$25 | |
1855 sltu $1,$2,$25 | |
1856 addu $3,$1 | |
1857 mflo $24 | |
1858 mfhi $25 | |
1859 addu $7,$24 | |
1860 sltu $1,$7,$24 | |
1861 multu $14,$11 # mul_add_c2(a[2],b[7],c1,c2,c3); | |
1862 addu $25,$1 | |
1863 addu $2,$25 | |
1864 sltu $1,$2,$25 | |
1865 addu $3,$1 | |
1866 sw $7,8*4($4) | |
1867 | |
1868 mflo $24 | |
1869 mfhi $25 | |
1870 slt $7,$25,$0 | |
1871 sll $25,1 | |
1872 multu $15,$10 # mul_add_c2(a[3],b[6],c1,c2,c3); | |
1873 slt $6,$24,$0 | |
1874 addu $25,$6 | |
1875 sll $24,1 | |
1876 addu $2,$24 | |
1877 sltu $1,$2,$24 | |
1878 addu $25,$1 | |
1879 addu $3,$25 | |
1880 sltu $1,$3,$25 | |
1881 addu $7,$1 | |
1882 mflo $24 | |
1883 mfhi $25 | |
1884 slt $1,$25,$0 | |
1885 addu $7,$1 | |
1886 multu $8,$9 # mul_add_c2(a[4],b[5],c1,c2,c3); | |
1887 sll $25,1 | |
1888 slt $6,$24,$0 | |
1889 addu $25,$6 | |
1890 sll $24,1 | |
1891 addu $2,$24 | |
1892 sltu $1,$2,$24 | |
1893 addu $25,$1 | |
1894 addu $3,$25 | |
1895 sltu $1,$3,$25 | |
1896 addu $7,$1 | |
1897 mflo $24 | |
1898 mfhi $25 | |
1899 slt $1,$25,$0 | |
1900 addu $7,$1 | |
1901 multu $11,$15 # mul_add_c2(a[7],b[3],c2,c3,c1); | |
1902 sll $25,1 | |
1903 slt $6,$24,$0 | |
1904 addu $25,$6 | |
1905 sll $24,1 | |
1906 addu $2,$24 | |
1907 sltu $1,$2,$24 | |
1908 addu $25,$1 | |
1909 addu $3,$25 | |
1910 sltu $1,$3,$25 | |
1911 addu $7,$1 | |
1912 sw $2,9*4($4) | |
1913 | |
1914 mflo $24 | |
1915 mfhi $25 | |
1916 slt $2,$25,$0 | |
1917 sll $25,1 | |
1918 multu $10,$8 # mul_add_c2(a[6],b[4],c2,c3,c1); | |
1919 slt $6,$24,$0 | |
1920 addu $25,$6 | |
1921 sll $24,1 | |
1922 addu $3,$24 | |
1923 sltu $1,$3,$24 | |
1924 addu $25,$1 | |
1925 addu $7,$25 | |
1926 sltu $1,$7,$25 | |
1927 addu $2,$1 | |
1928 mflo $24 | |
1929 mfhi $25 | |
1930 slt $1,$25,$0 | |
1931 addu $2,$1 | |
1932 multu $9,$9 # mul_add_c(a[5],b[5],c2,c3,c1); | |
1933 sll $25,1 | |
1934 slt $6,$24,$0 | |
1935 addu $25,$6 | |
1936 sll $24,1 | |
1937 addu $3,$24 | |
1938 sltu $1,$3,$24 | |
1939 addu $25,$1 | |
1940 addu $7,$25 | |
1941 sltu $1,$7,$25 | |
1942 addu $2,$1 | |
1943 mflo $24 | |
1944 mfhi $25 | |
1945 addu $3,$24 | |
1946 sltu $1,$3,$24 | |
1947 multu $8,$11 # mul_add_c2(a[4],b[7],c3,c1,c2); | |
1948 addu $25,$1 | |
1949 addu $7,$25 | |
1950 sltu $1,$7,$25 | |
1951 addu $2,$1 | |
1952 sw $3,10*4($4) | |
1953 | |
1954 mflo $24 | |
1955 mfhi $25 | |
1956 slt $3,$25,$0 | |
1957 sll $25,1 | |
1958 multu $9,$10 # mul_add_c2(a[5],b[6],c3,c1,c2); | |
1959 slt $6,$24,$0 | |
1960 addu $25,$6 | |
1961 sll $24,1 | |
1962 addu $7,$24 | |
1963 sltu $1,$7,$24 | |
1964 addu $25,$1 | |
1965 addu $2,$25 | |
1966 sltu $1,$2,$25 | |
1967 addu $3,$1 | |
1968 mflo $24 | |
1969 mfhi $25 | |
1970 slt $1,$25,$0 | |
1971 addu $3,$1 | |
1972 multu $11,$9 # mul_add_c2(a[7],b[5],c1,c2,c3); | |
1973 sll $25,1 | |
1974 slt $6,$24,$0 | |
1975 addu $25,$6 | |
1976 sll $24,1 | |
1977 addu $7,$24 | |
1978 sltu $1,$7,$24 | |
1979 addu $25,$1 | |
1980 addu $2,$25 | |
1981 sltu $1,$2,$25 | |
1982 addu $3,$1 | |
1983 sw $7,11*4($4) | |
1984 | |
1985 mflo $24 | |
1986 mfhi $25 | |
1987 slt $7,$25,$0 | |
1988 sll $25,1 | |
1989 multu $10,$10 # mul_add_c(a[6],b[6],c1,c2,c3); | |
1990 slt $6,$24,$0 | |
1991 addu $25,$6 | |
1992 sll $24,1 | |
1993 addu $2,$24 | |
1994 sltu $1,$2,$24 | |
1995 addu $25,$1 | |
1996 addu $3,$25 | |
1997 sltu $1,$3,$25 | |
1998 addu $7,$1 | |
1999 mflo $24 | |
2000 mfhi $25 | |
2001 addu $2,$24 | |
2002 sltu $1,$2,$24 | |
2003 multu $10,$11 # mul_add_c2(a[6],b[7],c2,c3,c1); | |
2004 addu $25,$1 | |
2005 addu $3,$25 | |
2006 sltu $1,$3,$25 | |
2007 addu $7,$1 | |
2008 sw $2,12*4($4) | |
2009 | |
2010 mflo $24 | |
2011 mfhi $25 | |
2012 slt $2,$25,$0 | |
2013 sll $25,1 | |
2014 multu $11,$11 # mul_add_c(a[7],b[7],c3,c1,c2); | |
2015 slt $6,$24,$0 | |
2016 addu $25,$6 | |
2017 sll $24,1 | |
2018 addu $3,$24 | |
2019 sltu $1,$3,$24 | |
2020 addu $25,$1 | |
2021 addu $7,$25 | |
2022 sltu $1,$7,$25 | |
2023 addu $2,$1 | |
2024 sw $3,13*4($4) | |
2025 | |
2026 mflo $24 | |
2027 mfhi $25 | |
2028 addu $7,$24 | |
2029 sltu $1,$7,$24 | |
2030 addu $25,$1 | |
2031 addu $2,$25 | |
2032 sw $7,14*4($4) | |
2033 sw $2,15*4($4) | |
2034 | |
2035 .set noreorder | |
2036 jr $31 | |
2037 nop | |
2038 .end bn_sqr_comba8 | |
2039 | |
2040 .align 5 | |
2041 .globl bn_sqr_comba4 | |
2042 .ent bn_sqr_comba4 | |
2043 bn_sqr_comba4: | |
2044 .set reorder | |
2045 lw $12,0($5) | |
2046 lw $13,4($5) | |
2047 multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3); | |
2048 lw $14,2*4($5) | |
2049 lw $15,3*4($5) | |
2050 mflo $2 | |
2051 mfhi $3 | |
2052 sw $2,0($4) | |
2053 | |
2054 multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1); | |
2055 mflo $24 | |
2056 mfhi $25 | |
2057 slt $2,$25,$0 | |
2058 sll $25,1 | |
2059 multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2); | |
2060 slt $6,$24,$0 | |
2061 addu $25,$6 | |
2062 sll $24,1 | |
2063 addu $3,$24 | |
2064 sltu $1,$3,$24 | |
2065 addu $7,$25,$1 | |
2066 sw $3,4($4) | |
2067 | |
2068 mflo $24 | |
2069 mfhi $25 | |
2070 slt $3,$25,$0 | |
2071 sll $25,1 | |
2072 multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2); | |
2073 slt $6,$24,$0 | |
2074 addu $25,$6 | |
2075 sll $24,1 | |
2076 addu $7,$24 | |
2077 sltu $1,$7,$24 | |
2078 addu $25,$1 | |
2079 addu $2,$25 | |
2080 sltu $1,$2,$25 | |
2081 addu $3,$1 | |
2082 mflo $24 | |
2083 mfhi $25 | |
2084 addu $7,$24 | |
2085 sltu $1,$7,$24 | |
2086 multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3); | |
2087 addu $25,$1 | |
2088 addu $2,$25 | |
2089 sltu $1,$2,$25 | |
2090 addu $3,$1 | |
2091 sw $7,2*4($4) | |
2092 | |
2093 mflo $24 | |
2094 mfhi $25 | |
2095 slt $7,$25,$0 | |
2096 sll $25,1 | |
2097 multu $13,$14 # mul_add_c(a2[1],b[2],c1,c2,c3); | |
2098 slt $6,$24,$0 | |
2099 addu $25,$6 | |
2100 sll $24,1 | |
2101 addu $2,$24 | |
2102 sltu $1,$2,$24 | |
2103 addu $25,$1 | |
2104 addu $3,$25 | |
2105 sltu $1,$3,$25 | |
2106 addu $7,$1 | |
2107 mflo $24 | |
2108 mfhi $25 | |
2109 slt $1,$25,$0 | |
2110 addu $7,$1 | |
2111 multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1); | |
2112 sll $25,1 | |
2113 slt $6,$24,$0 | |
2114 addu $25,$6 | |
2115 sll $24,1 | |
2116 addu $2,$24 | |
2117 sltu $1,$2,$24 | |
2118 addu $25,$1 | |
2119 addu $3,$25 | |
2120 sltu $1,$3,$25 | |
2121 addu $7,$1 | |
2122 sw $2,3*4($4) | |
2123 | |
2124 mflo $24 | |
2125 mfhi $25 | |
2126 slt $2,$25,$0 | |
2127 sll $25,1 | |
2128 multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1); | |
2129 slt $6,$24,$0 | |
2130 addu $25,$6 | |
2131 sll $24,1 | |
2132 addu $3,$24 | |
2133 sltu $1,$3,$24 | |
2134 addu $25,$1 | |
2135 addu $7,$25 | |
2136 sltu $1,$7,$25 | |
2137 addu $2,$1 | |
2138 mflo $24 | |
2139 mfhi $25 | |
2140 addu $3,$24 | |
2141 sltu $1,$3,$24 | |
2142 multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2); | |
2143 addu $25,$1 | |
2144 addu $7,$25 | |
2145 sltu $1,$7,$25 | |
2146 addu $2,$1 | |
2147 sw $3,4*4($4) | |
2148 | |
2149 mflo $24 | |
2150 mfhi $25 | |
2151 slt $3,$25,$0 | |
2152 sll $25,1 | |
2153 multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3); | |
2154 slt $6,$24,$0 | |
2155 addu $25,$6 | |
2156 sll $24,1 | |
2157 addu $7,$24 | |
2158 sltu $1,$7,$24 | |
2159 addu $25,$1 | |
2160 addu $2,$25 | |
2161 sltu $1,$2,$25 | |
2162 addu $3,$1 | |
2163 sw $7,5*4($4) | |
2164 | |
2165 mflo $24 | |
2166 mfhi $25 | |
2167 addu $2,$24 | |
2168 sltu $1,$2,$24 | |
2169 addu $25,$1 | |
2170 addu $3,$25 | |
2171 sw $2,6*4($4) | |
2172 sw $3,7*4($4) | |
2173 | |
2174 .set noreorder | |
2175 jr $31 | |
2176 nop | |
2177 .end bn_sqr_comba4 | |
OLD | NEW |