Index: openssl/crypto/bn/asm/bn-mips.S |
diff --git a/openssl/crypto/bn/asm/bn-mips.S b/openssl/crypto/bn/asm/bn-mips.S |
deleted file mode 100644 |
index 229c70916fdaf5630939a036d1ca0b4cf07b80c9..0000000000000000000000000000000000000000 |
--- a/openssl/crypto/bn/asm/bn-mips.S |
+++ /dev/null |
@@ -1,2177 +0,0 @@ |
-.set mips2 |
-.rdata |
-.asciiz "mips3.s, Version 1.2" |
-.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" |
- |
-.text |
-.set noat |
- |
-.align 5 |
-.globl bn_mul_add_words |
-.ent bn_mul_add_words |
-bn_mul_add_words: |
- .set noreorder |
- bgtz $6,bn_mul_add_words_internal |
- move $2,$0 |
- jr $31 |
- move $4,$2 |
-.end bn_mul_add_words |
- |
-.align 5 |
-.ent bn_mul_add_words_internal |
-bn_mul_add_words_internal: |
- .set reorder |
- li $3,-4 |
- and $8,$6,$3 |
- lw $12,0($5) |
- beqz $8,.L_bn_mul_add_words_tail |
- |
-.L_bn_mul_add_words_loop: |
- multu $12,$7 |
- lw $13,0($4) |
- lw $14,4($5) |
- lw $15,4($4) |
- lw $8,2*4($5) |
- lw $9,2*4($4) |
- addu $13,$2 |
- sltu $2,$13,$2 # All manuals say it "compares 32-bit |
- # values", but it seems to work fine |
- # even on 64-bit registers. |
- mflo $1 |
- mfhi $12 |
- addu $13,$1 |
- addu $2,$12 |
- multu $14,$7 |
- sltu $1,$13,$1 |
- sw $13,0($4) |
- addu $2,$1 |
- |
- lw $10,3*4($5) |
- lw $11,3*4($4) |
- addu $15,$2 |
- sltu $2,$15,$2 |
- mflo $1 |
- mfhi $14 |
- addu $15,$1 |
- addu $2,$14 |
- multu $8,$7 |
- sltu $1,$15,$1 |
- sw $15,4($4) |
- addu $2,$1 |
- |
- subu $6,4 |
- addu $4,4*4 |
- addu $5,4*4 |
- addu $9,$2 |
- sltu $2,$9,$2 |
- mflo $1 |
- mfhi $8 |
- addu $9,$1 |
- addu $2,$8 |
- multu $10,$7 |
- sltu $1,$9,$1 |
- sw $9,-2*4($4) |
- addu $2,$1 |
- |
- |
- and $8,$6,$3 |
- addu $11,$2 |
- sltu $2,$11,$2 |
- mflo $1 |
- mfhi $10 |
- addu $11,$1 |
- addu $2,$10 |
- sltu $1,$11,$1 |
- sw $11,-4($4) |
- addu $2,$1 |
- .set noreorder |
- bgtzl $8,.L_bn_mul_add_words_loop |
- lw $12,0($5) |
- |
- beqz $6,.L_bn_mul_add_words_return |
- nop |
- |
-.L_bn_mul_add_words_tail: |
- .set reorder |
- lw $12,0($5) |
- multu $12,$7 |
- lw $13,0($4) |
- subu $6,1 |
- addu $13,$2 |
- sltu $2,$13,$2 |
- mflo $1 |
- mfhi $12 |
- addu $13,$1 |
- addu $2,$12 |
- sltu $1,$13,$1 |
- sw $13,0($4) |
- addu $2,$1 |
- beqz $6,.L_bn_mul_add_words_return |
- |
- lw $12,4($5) |
- multu $12,$7 |
- lw $13,4($4) |
- subu $6,1 |
- addu $13,$2 |
- sltu $2,$13,$2 |
- mflo $1 |
- mfhi $12 |
- addu $13,$1 |
- addu $2,$12 |
- sltu $1,$13,$1 |
- sw $13,4($4) |
- addu $2,$1 |
- beqz $6,.L_bn_mul_add_words_return |
- |
- lw $12,2*4($5) |
- multu $12,$7 |
- lw $13,2*4($4) |
- addu $13,$2 |
- sltu $2,$13,$2 |
- mflo $1 |
- mfhi $12 |
- addu $13,$1 |
- addu $2,$12 |
- sltu $1,$13,$1 |
- sw $13,2*4($4) |
- addu $2,$1 |
- |
-.L_bn_mul_add_words_return: |
- .set noreorder |
- jr $31 |
- move $4,$2 |
-.end bn_mul_add_words_internal |
- |
-.align 5 |
-.globl bn_mul_words |
-.ent bn_mul_words |
-bn_mul_words: |
- .set noreorder |
- bgtz $6,bn_mul_words_internal |
- move $2,$0 |
- jr $31 |
- move $4,$2 |
-.end bn_mul_words |
- |
-.align 5 |
-.ent bn_mul_words_internal |
-bn_mul_words_internal: |
- .set reorder |
- li $3,-4 |
- and $8,$6,$3 |
- lw $12,0($5) |
- beqz $8,.L_bn_mul_words_tail |
- |
-.L_bn_mul_words_loop: |
- multu $12,$7 |
- lw $14,4($5) |
- lw $8,2*4($5) |
- lw $10,3*4($5) |
- mflo $1 |
- mfhi $12 |
- addu $2,$1 |
- sltu $13,$2,$1 |
- multu $14,$7 |
- sw $2,0($4) |
- addu $2,$13,$12 |
- |
- subu $6,4 |
- addu $4,4*4 |
- addu $5,4*4 |
- mflo $1 |
- mfhi $14 |
- addu $2,$1 |
- sltu $15,$2,$1 |
- multu $8,$7 |
- sw $2,-3*4($4) |
- addu $2,$15,$14 |
- |
- mflo $1 |
- mfhi $8 |
- addu $2,$1 |
- sltu $9,$2,$1 |
- multu $10,$7 |
- sw $2,-2*4($4) |
- addu $2,$9,$8 |
- |
- and $8,$6,$3 |
- mflo $1 |
- mfhi $10 |
- addu $2,$1 |
- sltu $11,$2,$1 |
- sw $2,-4($4) |
- addu $2,$11,$10 |
- .set noreorder |
- bgtzl $8,.L_bn_mul_words_loop |
- lw $12,0($5) |
- |
- beqz $6,.L_bn_mul_words_return |
- nop |
- |
-.L_bn_mul_words_tail: |
- .set reorder |
- lw $12,0($5) |
- multu $12,$7 |
- subu $6,1 |
- mflo $1 |
- mfhi $12 |
- addu $2,$1 |
- sltu $13,$2,$1 |
- sw $2,0($4) |
- addu $2,$13,$12 |
- beqz $6,.L_bn_mul_words_return |
- |
- lw $12,4($5) |
- multu $12,$7 |
- subu $6,1 |
- mflo $1 |
- mfhi $12 |
- addu $2,$1 |
- sltu $13,$2,$1 |
- sw $2,4($4) |
- addu $2,$13,$12 |
- beqz $6,.L_bn_mul_words_return |
- |
- lw $12,2*4($5) |
- multu $12,$7 |
- mflo $1 |
- mfhi $12 |
- addu $2,$1 |
- sltu $13,$2,$1 |
- sw $2,2*4($4) |
- addu $2,$13,$12 |
- |
-.L_bn_mul_words_return: |
- .set noreorder |
- jr $31 |
- move $4,$2 |
-.end bn_mul_words_internal |
- |
-.align 5 |
-.globl bn_sqr_words |
-.ent bn_sqr_words |
-bn_sqr_words: |
- .set noreorder |
- bgtz $6,bn_sqr_words_internal |
- move $2,$0 |
- jr $31 |
- move $4,$2 |
-.end bn_sqr_words |
- |
-.align 5 |
-.ent bn_sqr_words_internal |
-bn_sqr_words_internal: |
- .set reorder |
- li $3,-4 |
- and $8,$6,$3 |
- lw $12,0($5) |
- beqz $8,.L_bn_sqr_words_tail |
- |
-.L_bn_sqr_words_loop: |
- multu $12,$12 |
- lw $14,4($5) |
- lw $8,2*4($5) |
- lw $10,3*4($5) |
- mflo $13 |
- mfhi $12 |
- sw $13,0($4) |
- sw $12,4($4) |
- |
- multu $14,$14 |
- subu $6,4 |
- addu $4,8*4 |
- addu $5,4*4 |
- mflo $15 |
- mfhi $14 |
- sw $15,-6*4($4) |
- sw $14,-5*4($4) |
- |
- multu $8,$8 |
- mflo $9 |
- mfhi $8 |
- sw $9,-4*4($4) |
- sw $8,-3*4($4) |
- |
- |
- multu $10,$10 |
- and $8,$6,$3 |
- mflo $11 |
- mfhi $10 |
- sw $11,-2*4($4) |
- sw $10,-4($4) |
- |
- .set noreorder |
- bgtzl $8,.L_bn_sqr_words_loop |
- lw $12,0($5) |
- |
- beqz $6,.L_bn_sqr_words_return |
- nop |
- |
-.L_bn_sqr_words_tail: |
- .set reorder |
- lw $12,0($5) |
- multu $12,$12 |
- subu $6,1 |
- mflo $13 |
- mfhi $12 |
- sw $13,0($4) |
- sw $12,4($4) |
- beqz $6,.L_bn_sqr_words_return |
- |
- lw $12,4($5) |
- multu $12,$12 |
- subu $6,1 |
- mflo $13 |
- mfhi $12 |
- sw $13,2*4($4) |
- sw $12,3*4($4) |
- beqz $6,.L_bn_sqr_words_return |
- |
- lw $12,2*4($5) |
- multu $12,$12 |
- mflo $13 |
- mfhi $12 |
- sw $13,4*4($4) |
- sw $12,5*4($4) |
- |
-.L_bn_sqr_words_return: |
- .set noreorder |
- jr $31 |
- move $4,$2 |
- |
-.end bn_sqr_words_internal |
- |
-.align 5 |
-.globl bn_add_words |
-.ent bn_add_words |
-bn_add_words: |
- .set noreorder |
- bgtz $7,bn_add_words_internal |
- move $2,$0 |
- jr $31 |
- move $4,$2 |
-.end bn_add_words |
- |
-.align 5 |
-.ent bn_add_words_internal |
-bn_add_words_internal: |
- .set reorder |
- li $3,-4 |
- and $1,$7,$3 |
- lw $12,0($5) |
- beqz $1,.L_bn_add_words_tail |
- |
-.L_bn_add_words_loop: |
- lw $8,0($6) |
- subu $7,4 |
- lw $13,4($5) |
- and $1,$7,$3 |
- lw $14,2*4($5) |
- addu $6,4*4 |
- lw $15,3*4($5) |
- addu $4,4*4 |
- lw $9,-3*4($6) |
- addu $5,4*4 |
- lw $10,-2*4($6) |
- lw $11,-4($6) |
- addu $8,$12 |
- sltu $24,$8,$12 |
- addu $12,$8,$2 |
- sltu $2,$12,$8 |
- sw $12,-4*4($4) |
- addu $2,$24 |
- |
- addu $9,$13 |
- sltu $25,$9,$13 |
- addu $13,$9,$2 |
- sltu $2,$13,$9 |
- sw $13,-3*4($4) |
- addu $2,$25 |
- |
- addu $10,$14 |
- sltu $24,$10,$14 |
- addu $14,$10,$2 |
- sltu $2,$14,$10 |
- sw $14,-2*4($4) |
- addu $2,$24 |
- |
- addu $11,$15 |
- sltu $25,$11,$15 |
- addu $15,$11,$2 |
- sltu $2,$15,$11 |
- sw $15,-4($4) |
- addu $2,$25 |
- |
- .set noreorder |
- bgtzl $1,.L_bn_add_words_loop |
- lw $12,0($5) |
- |
- beqz $7,.L_bn_add_words_return |
- nop |
- |
-.L_bn_add_words_tail: |
- .set reorder |
- lw $12,0($5) |
- lw $8,0($6) |
- addu $8,$12 |
- subu $7,1 |
- sltu $24,$8,$12 |
- addu $12,$8,$2 |
- sltu $2,$12,$8 |
- sw $12,0($4) |
- addu $2,$24 |
- beqz $7,.L_bn_add_words_return |
- |
- lw $13,4($5) |
- lw $9,4($6) |
- addu $9,$13 |
- subu $7,1 |
- sltu $25,$9,$13 |
- addu $13,$9,$2 |
- sltu $2,$13,$9 |
- sw $13,4($4) |
- addu $2,$25 |
- beqz $7,.L_bn_add_words_return |
- |
- lw $14,2*4($5) |
- lw $10,2*4($6) |
- addu $10,$14 |
- sltu $24,$10,$14 |
- addu $14,$10,$2 |
- sltu $2,$14,$10 |
- sw $14,2*4($4) |
- addu $2,$24 |
- |
-.L_bn_add_words_return: |
- .set noreorder |
- jr $31 |
- move $4,$2 |
- |
-.end bn_add_words_internal |
- |
-.align 5 |
-.globl bn_sub_words |
-.ent bn_sub_words |
-bn_sub_words: |
- .set noreorder |
- bgtz $7,bn_sub_words_internal |
- move $2,$0 |
- jr $31 |
- move $4,$0 |
-.end bn_sub_words |
- |
-.align 5 |
-.ent bn_sub_words_internal |
-bn_sub_words_internal: |
- .set reorder |
- li $3,-4 |
- and $1,$7,$3 |
- lw $12,0($5) |
- beqz $1,.L_bn_sub_words_tail |
- |
-.L_bn_sub_words_loop: |
- lw $8,0($6) |
- subu $7,4 |
- lw $13,4($5) |
- and $1,$7,$3 |
- lw $14,2*4($5) |
- addu $6,4*4 |
- lw $15,3*4($5) |
- addu $4,4*4 |
- lw $9,-3*4($6) |
- addu $5,4*4 |
- lw $10,-2*4($6) |
- lw $11,-4($6) |
- sltu $24,$12,$8 |
- subu $8,$12,$8 |
- subu $12,$8,$2 |
- sgtu $2,$12,$8 |
- sw $12,-4*4($4) |
- addu $2,$24 |
- |
- sltu $25,$13,$9 |
- subu $9,$13,$9 |
- subu $13,$9,$2 |
- sgtu $2,$13,$9 |
- sw $13,-3*4($4) |
- addu $2,$25 |
- |
- |
- sltu $24,$14,$10 |
- subu $10,$14,$10 |
- subu $14,$10,$2 |
- sgtu $2,$14,$10 |
- sw $14,-2*4($4) |
- addu $2,$24 |
- |
- sltu $25,$15,$11 |
- subu $11,$15,$11 |
- subu $15,$11,$2 |
- sgtu $2,$15,$11 |
- sw $15,-4($4) |
- addu $2,$25 |
- |
- .set noreorder |
- bgtzl $1,.L_bn_sub_words_loop |
- lw $12,0($5) |
- |
- beqz $7,.L_bn_sub_words_return |
- nop |
- |
-.L_bn_sub_words_tail: |
- .set reorder |
- lw $12,0($5) |
- lw $8,0($6) |
- subu $7,1 |
- sltu $24,$12,$8 |
- subu $8,$12,$8 |
- subu $12,$8,$2 |
- sgtu $2,$12,$8 |
- sw $12,0($4) |
- addu $2,$24 |
- beqz $7,.L_bn_sub_words_return |
- |
- lw $13,4($5) |
- subu $7,1 |
- lw $9,4($6) |
- sltu $25,$13,$9 |
- subu $9,$13,$9 |
- subu $13,$9,$2 |
- sgtu $2,$13,$9 |
- sw $13,4($4) |
- addu $2,$25 |
- beqz $7,.L_bn_sub_words_return |
- |
- lw $14,2*4($5) |
- lw $10,2*4($6) |
- sltu $24,$14,$10 |
- subu $10,$14,$10 |
- subu $14,$10,$2 |
- sgtu $2,$14,$10 |
- sw $14,2*4($4) |
- addu $2,$24 |
- |
-.L_bn_sub_words_return: |
- .set noreorder |
- jr $31 |
- move $4,$2 |
-.end bn_sub_words_internal |
- |
-.align 5 |
-.globl bn_div_3_words |
-.ent bn_div_3_words |
-bn_div_3_words: |
- .set noreorder |
- move $7,$4 # we know that bn_div_words does not |
- # touch $7, $10, $11 and preserves $6 |
- # so that we can save two arguments |
- # and return address in registers |
- # instead of stack:-) |
- |
- lw $4,($7) |
- move $10,$5 |
- bne $4,$6,bn_div_3_words_internal |
- lw $5,-4($7) |
- li $2,-1 |
- jr $31 |
- move $4,$2 |
-.end bn_div_3_words |
- |
-.align 5 |
-.ent bn_div_3_words_internal |
-bn_div_3_words_internal: |
- .set reorder |
- move $11,$31 |
- bal bn_div_words_internal |
- move $31,$11 |
- multu $10,$2 |
- lw $14,-2*4($7) |
- move $8,$0 |
- mfhi $13 |
- mflo $12 |
- sltu $24,$13,$5 |
-.L_bn_div_3_words_inner_loop: |
- bnez $24,.L_bn_div_3_words_inner_loop_done |
- sgeu $1,$14,$12 |
- seq $25,$13,$5 |
- and $1,$25 |
- sltu $15,$12,$10 |
- addu $5,$6 |
- subu $13,$15 |
- subu $12,$10 |
- sltu $24,$13,$5 |
- sltu $8,$5,$6 |
- or $24,$8 |
- .set noreorder |
- beqzl $1,.L_bn_div_3_words_inner_loop |
- subu $2,1 |
- .set reorder |
-.L_bn_div_3_words_inner_loop_done: |
- .set noreorder |
- jr $31 |
- move $4,$2 |
-.end bn_div_3_words_internal |
- |
-.align 5 |
-.globl bn_div_words |
-.ent bn_div_words |
-bn_div_words: |
- .set noreorder |
- bnez $6,bn_div_words_internal |
- li $2,-1 # I would rather signal div-by-zero |
- # which can be done with 'break 7' |
- jr $31 |
- move $4,$2 |
-.end bn_div_words |
- |
-.align 5 |
-.ent bn_div_words_internal |
-bn_div_words_internal: |
- move $3,$0 |
- bltz $6,.L_bn_div_words_body |
- move $25,$3 |
- sll $6,1 |
- bgtz $6,.-4 |
- addu $25,1 |
- |
- .set reorder |
- negu $13,$25 |
- li $14,-1 |
- sll $14,$13 |
- and $14,$4 |
- srl $1,$5,$13 |
- .set noreorder |
- bnezl $14,.+8 |
- break 6 # signal overflow |
- .set reorder |
- sll $4,$25 |
- sll $5,$25 |
- or $4,$1 |
-.L_bn_div_words_body: |
- srl $3,$6,4*4 # bits |
- sgeu $1,$4,$6 |
- .set noreorder |
- bnezl $1,.+8 |
- subu $4,$6 |
- .set reorder |
- |
- li $8,-1 |
- srl $9,$4,4*4 # bits |
- srl $8,4*4 # q=0xffffffff |
- beq $3,$9,.L_bn_div_words_skip_div1 |
- divu $0,$4,$3 |
- mflo $8 |
-.L_bn_div_words_skip_div1: |
- multu $6,$8 |
- sll $15,$4,4*4 # bits |
- srl $1,$5,4*4 # bits |
- or $15,$1 |
- mflo $12 |
- mfhi $13 |
-.L_bn_div_words_inner_loop1: |
- sltu $14,$15,$12 |
- seq $24,$9,$13 |
- sltu $1,$9,$13 |
- and $14,$24 |
- sltu $2,$12,$6 |
- or $1,$14 |
- .set noreorder |
- beqz $1,.L_bn_div_words_inner_loop1_done |
- subu $13,$2 |
- subu $12,$6 |
- b .L_bn_div_words_inner_loop1 |
- subu $8,1 |
- .set reorder |
-.L_bn_div_words_inner_loop1_done: |
- |
- sll $5,4*4 # bits |
- subu $4,$15,$12 |
- sll $2,$8,4*4 # bits |
- |
- li $8,-1 |
- srl $9,$4,4*4 # bits |
- srl $8,4*4 # q=0xffffffff |
- beq $3,$9,.L_bn_div_words_skip_div2 |
- divu $0,$4,$3 |
- mflo $8 |
-.L_bn_div_words_skip_div2: |
- multu $6,$8 |
- sll $15,$4,4*4 # bits |
- srl $1,$5,4*4 # bits |
- or $15,$1 |
- mflo $12 |
- mfhi $13 |
-.L_bn_div_words_inner_loop2: |
- sltu $14,$15,$12 |
- seq $24,$9,$13 |
- sltu $1,$9,$13 |
- and $14,$24 |
- sltu $3,$12,$6 |
- or $1,$14 |
- .set noreorder |
- beqz $1,.L_bn_div_words_inner_loop2_done |
- subu $13,$3 |
- subu $12,$6 |
- b .L_bn_div_words_inner_loop2 |
- subu $8,1 |
- .set reorder |
-.L_bn_div_words_inner_loop2_done: |
- |
- subu $4,$15,$12 |
- or $2,$8 |
- srl $3,$4,$25 # $3 contains remainder if anybody wants it |
- srl $6,$25 # restore $6 |
- |
- .set noreorder |
- move $5,$3 |
- jr $31 |
- move $4,$2 |
-.end bn_div_words_internal |
- |
-.align 5 |
-.globl bn_mul_comba8 |
-.ent bn_mul_comba8 |
-bn_mul_comba8: |
- .set noreorder |
- .frame $29,6*4,$31 |
- .mask 0x003f0000,-4 |
- subu $29,6*4 |
- sw $21,5*4($29) |
- sw $20,4*4($29) |
- sw $19,3*4($29) |
- sw $18,2*4($29) |
- sw $17,1*4($29) |
- sw $16,0*4($29) |
- |
- .set reorder |
- lw $12,0($5) # If compiled with -mips3 option on |
- # R5000 box assembler barks on this |
- # 1ine with "should not have mult/div |
- # as last instruction in bb (R10K |
- # bug)" warning. If anybody out there |
- # has a clue about how to circumvent |
- # this do send me a note. |
- # <appro@fy.chalmers.se> |
- |
- lw $8,0($6) |
- lw $13,4($5) |
- lw $14,2*4($5) |
- multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3); |
- lw $15,3*4($5) |
- lw $9,4($6) |
- lw $10,2*4($6) |
- lw $11,3*4($6) |
- mflo $2 |
- mfhi $3 |
- |
- lw $16,4*4($5) |
- lw $18,5*4($5) |
- multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1); |
- lw $20,6*4($5) |
- lw $5,7*4($5) |
- lw $17,4*4($6) |
- lw $19,5*4($6) |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1); |
- addu $7,$25,$1 |
- lw $21,6*4($6) |
- lw $6,7*4($6) |
- sw $2,0($4) # r[0]=c1; |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $2,$7,$25 |
- sw $3,4($4) # r[1]=c2; |
- |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2); |
- addu $25,$1 |
- addu $2,$25 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $3,$2,$25 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- sw $7,2*4($4) # r[2]=c3; |
- |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $7,$3,$25 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $16,$8 # mul_add_c(a[4],b[0],c2,c3,c1); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- sw $2,3*4($4) # r[3]=c1; |
- |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $2,$7,$25 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $12,$17 # mul_add_c(a[0],b[4],c2,c3,c1); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $12,$19 # mul_add_c(a[0],b[5],c3,c1,c2); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- sw $3,4*4($4) # r[4]=c2; |
- |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $13,$17 # mul_add_c(a[1],b[4],c3,c1,c2); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $3,$2,$25 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $16,$9 # mul_add_c(a[4],b[1],c3,c1,c2); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $18,$8 # mul_add_c(a[5],b[0],c3,c1,c2); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $20,$8 # mul_add_c(a[6],b[0],c1,c2,c3); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- sw $7,5*4($4) # r[5]=c3; |
- |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $18,$9 # mul_add_c(a[5],b[1],c1,c2,c3); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $7,$3,$25 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $16,$10 # mul_add_c(a[4],b[2],c1,c2,c3); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $14,$17 # mul_add_c(a[2],b[4],c1,c2,c3); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $13,$19 # mul_add_c(a[1],b[5],c1,c2,c3); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $12,$21 # mul_add_c(a[0],b[6],c1,c2,c3); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $12,$6 # mul_add_c(a[0],b[7],c2,c3,c1); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- sw $2,6*4($4) # r[6]=c1; |
- |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $13,$21 # mul_add_c(a[1],b[6],c2,c3,c1); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $2,$7,$25 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $14,$19 # mul_add_c(a[2],b[5],c2,c3,c1); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $15,$17 # mul_add_c(a[3],b[4],c2,c3,c1); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $16,$11 # mul_add_c(a[4],b[3],c2,c3,c1); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $18,$10 # mul_add_c(a[5],b[2],c2,c3,c1); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $20,$9 # mul_add_c(a[6],b[1],c2,c3,c1); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $5,$8 # mul_add_c(a[7],b[0],c2,c3,c1); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $5,$9 # mul_add_c(a[7],b[1],c3,c1,c2); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- sw $3,7*4($4) # r[7]=c2; |
- |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $20,$10 # mul_add_c(a[6],b[2],c3,c1,c2); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $3,$2,$25 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $18,$11 # mul_add_c(a[5],b[3],c3,c1,c2); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $16,$17 # mul_add_c(a[4],b[4],c3,c1,c2); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $15,$19 # mul_add_c(a[3],b[5],c3,c1,c2); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $14,$21 # mul_add_c(a[2],b[6],c3,c1,c2); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $13,$6 # mul_add_c(a[1],b[7],c3,c1,c2); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $14,$6 # mul_add_c(a[2],b[7],c1,c2,c3); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- sw $7,8*4($4) # r[8]=c3; |
- |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $15,$21 # mul_add_c(a[3],b[6],c1,c2,c3); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $7,$3,$25 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $16,$19 # mul_add_c(a[4],b[5],c1,c2,c3); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $18,$17 # mul_add_c(a[5],b[4],c1,c2,c3); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $20,$11 # mul_add_c(a[6],b[3],c1,c2,c3); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $5,$10 # mul_add_c(a[7],b[2],c1,c2,c3); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $5,$11 # mul_add_c(a[7],b[3],c2,c3,c1); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- sw $2,9*4($4) # r[9]=c1; |
- |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $20,$17 # mul_add_c(a[6],b[4],c2,c3,c1); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $2,$7,$25 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $18,$19 # mul_add_c(a[5],b[5],c2,c3,c1); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $16,$21 # mul_add_c(a[4],b[6],c2,c3,c1); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $15,$6 # mul_add_c(a[3],b[7],c2,c3,c1); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $16,$6 # mul_add_c(a[4],b[7],c3,c1,c2); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- sw $3,10*4($4) # r[10]=c2; |
- |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $18,$21 # mul_add_c(a[5],b[6],c3,c1,c2); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $3,$2,$25 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $20,$19 # mul_add_c(a[6],b[5],c3,c1,c2); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $5,$17 # mul_add_c(a[7],b[4],c3,c1,c2); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $5,$19 # mul_add_c(a[7],b[5],c1,c2,c3); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- sw $7,11*4($4) # r[11]=c3; |
- |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $20,$21 # mul_add_c(a[6],b[6],c1,c2,c3); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $7,$3,$25 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $18,$6 # mul_add_c(a[5],b[7],c1,c2,c3); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $20,$6 # mul_add_c(a[6],b[7],c2,c3,c1); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- sw $2,12*4($4) # r[12]=c1; |
- |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $5,$21 # mul_add_c(a[7],b[6],c2,c3,c1); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $2,$7,$25 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $5,$6 # mul_add_c(a[7],b[7],c3,c1,c2); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- sw $3,13*4($4) # r[13]=c2; |
- |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- addu $25,$1 |
- addu $2,$25 |
- sw $7,14*4($4) # r[14]=c3; |
- sw $2,15*4($4) # r[15]=c1; |
- |
- .set noreorder |
- lw $21,5*4($29) |
- lw $20,4*4($29) |
- lw $19,3*4($29) |
- lw $18,2*4($29) |
- lw $17,1*4($29) |
- lw $16,0*4($29) |
- jr $31 |
- addu $29,6*4 |
-.end bn_mul_comba8 |
- |
-.align 5 |
-.globl bn_mul_comba4 |
-.ent bn_mul_comba4 |
-bn_mul_comba4: |
- .set reorder |
- lw $12,0($5) |
- lw $8,0($6) |
- lw $13,4($5) |
- lw $14,2*4($5) |
- multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3); |
- lw $15,3*4($5) |
- lw $9,4($6) |
- lw $10,2*4($6) |
- lw $11,3*4($6) |
- mflo $2 |
- mfhi $3 |
- sw $2,0($4) |
- |
- multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1); |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1); |
- addu $7,$25,$1 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $2,$7,$25 |
- sw $3,4($4) |
- |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2); |
- addu $25,$1 |
- addu $2,$25 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $3,$2,$25 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- sw $7,2*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $7,$3,$25 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- sw $2,3*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $2,$7,$25 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- sw $3,4*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $3,$2,$25 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- sw $7,5*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- addu $25,$1 |
- addu $3,$25 |
- sw $2,6*4($4) |
- sw $3,7*4($4) |
- |
- .set noreorder |
- jr $31 |
- nop |
-.end bn_mul_comba4 |
- |
-.align 5 |
-.globl bn_sqr_comba8 |
-.ent bn_sqr_comba8 |
-bn_sqr_comba8: |
- .set reorder |
- lw $12,0($5) |
- lw $13,4($5) |
- lw $14,2*4($5) |
- lw $15,3*4($5) |
- |
- multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3); |
- lw $8,4*4($5) |
- lw $9,5*4($5) |
- lw $10,6*4($5) |
- lw $11,7*4($5) |
- mflo $2 |
- mfhi $3 |
- sw $2,0($4) |
- |
- multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1); |
- mflo $24 |
- mfhi $25 |
- slt $2,$25,$0 |
- sll $25,1 |
- multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2); |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- addu $7,$25,$1 |
- sw $3,4($4) |
- |
- mflo $24 |
- mfhi $25 |
- slt $3,$25,$0 |
- sll $25,1 |
- multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2); |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- sw $7,2*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- slt $7,$25,$0 |
- sll $25,1 |
- multu $13,$14 # mul_add_c2(a[1],b[2],c1,c2,c3); |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- slt $1,$25,$0 |
- addu $7,$1 |
- multu $8,$12 # mul_add_c2(a[4],b[0],c2,c3,c1); |
- sll $25,1 |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- sw $2,3*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- slt $2,$25,$0 |
- sll $25,1 |
- multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1); |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- slt $1,$25,$0 |
- addu $2,$1 |
- multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1); |
- sll $25,1 |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $12,$9 # mul_add_c2(a[0],b[5],c3,c1,c2); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- sw $3,4*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- slt $3,$25,$0 |
- sll $25,1 |
- multu $13,$8 # mul_add_c2(a[1],b[4],c3,c1,c2); |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- mflo $24 |
- mfhi $25 |
- slt $1,$25,$0 |
- addu $3,$1 |
- multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2); |
- sll $25,1 |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- mflo $24 |
- mfhi $25 |
- slt $1,$25,$0 |
- multu $10,$12 # mul_add_c2(a[6],b[0],c1,c2,c3); |
- addu $3,$1 |
- sll $25,1 |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- sw $7,5*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- slt $7,$25,$0 |
- sll $25,1 |
- multu $9,$13 # mul_add_c2(a[5],b[1],c1,c2,c3); |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- slt $1,$25,$0 |
- addu $7,$1 |
- multu $8,$14 # mul_add_c2(a[4],b[2],c1,c2,c3); |
- sll $25,1 |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- slt $1,$25,$0 |
- addu $7,$1 |
- multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3); |
- sll $25,1 |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $12,$11 # mul_add_c2(a[0],b[7],c2,c3,c1); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- sw $2,6*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- slt $2,$25,$0 |
- sll $25,1 |
- multu $13,$10 # mul_add_c2(a[1],b[6],c2,c3,c1); |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- slt $1,$25,$0 |
- addu $2,$1 |
- multu $14,$9 # mul_add_c2(a[2],b[5],c2,c3,c1); |
- sll $25,1 |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- slt $1,$25,$0 |
- addu $2,$1 |
- multu $15,$8 # mul_add_c2(a[3],b[4],c2,c3,c1); |
- sll $25,1 |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- slt $1,$25,$0 |
- addu $2,$1 |
- multu $11,$13 # mul_add_c2(a[7],b[1],c3,c1,c2); |
- sll $25,1 |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- sw $3,7*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- slt $3,$25,$0 |
- sll $25,1 |
- multu $10,$14 # mul_add_c2(a[6],b[2],c3,c1,c2); |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- mflo $24 |
- mfhi $25 |
- slt $1,$25,$0 |
- addu $3,$1 |
- multu $9,$15 # mul_add_c2(a[5],b[3],c3,c1,c2); |
- sll $25,1 |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- mflo $24 |
- mfhi $25 |
- slt $1,$25,$0 |
- addu $3,$1 |
- multu $8,$8 # mul_add_c(a[4],b[4],c3,c1,c2); |
- sll $25,1 |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $14,$11 # mul_add_c2(a[2],b[7],c1,c2,c3); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- sw $7,8*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- slt $7,$25,$0 |
- sll $25,1 |
- multu $15,$10 # mul_add_c2(a[3],b[6],c1,c2,c3); |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- slt $1,$25,$0 |
- addu $7,$1 |
- multu $8,$9 # mul_add_c2(a[4],b[5],c1,c2,c3); |
- sll $25,1 |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- slt $1,$25,$0 |
- addu $7,$1 |
- multu $11,$15 # mul_add_c2(a[7],b[3],c2,c3,c1); |
- sll $25,1 |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- sw $2,9*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- slt $2,$25,$0 |
- sll $25,1 |
- multu $10,$8 # mul_add_c2(a[6],b[4],c2,c3,c1); |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- slt $1,$25,$0 |
- addu $2,$1 |
- multu $9,$9 # mul_add_c(a[5],b[5],c2,c3,c1); |
- sll $25,1 |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $8,$11 # mul_add_c2(a[4],b[7],c3,c1,c2); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- sw $3,10*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- slt $3,$25,$0 |
- sll $25,1 |
- multu $9,$10 # mul_add_c2(a[5],b[6],c3,c1,c2); |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- mflo $24 |
- mfhi $25 |
- slt $1,$25,$0 |
- addu $3,$1 |
- multu $11,$9 # mul_add_c2(a[7],b[5],c1,c2,c3); |
- sll $25,1 |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- sw $7,11*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- slt $7,$25,$0 |
- sll $25,1 |
- multu $10,$10 # mul_add_c(a[6],b[6],c1,c2,c3); |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- multu $10,$11 # mul_add_c2(a[6],b[7],c2,c3,c1); |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- sw $2,12*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- slt $2,$25,$0 |
- sll $25,1 |
- multu $11,$11 # mul_add_c(a[7],b[7],c3,c1,c2); |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- sw $3,13*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- addu $25,$1 |
- addu $2,$25 |
- sw $7,14*4($4) |
- sw $2,15*4($4) |
- |
- .set noreorder |
- jr $31 |
- nop |
-.end bn_sqr_comba8 |
- |
-.align 5 |
-.globl bn_sqr_comba4 |
-.ent bn_sqr_comba4 |
-bn_sqr_comba4: |
- .set reorder |
- lw $12,0($5) |
- lw $13,4($5) |
- multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3); |
- lw $14,2*4($5) |
- lw $15,3*4($5) |
- mflo $2 |
- mfhi $3 |
- sw $2,0($4) |
- |
- multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1); |
- mflo $24 |
- mfhi $25 |
- slt $2,$25,$0 |
- sll $25,1 |
- multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2); |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- addu $7,$25,$1 |
- sw $3,4($4) |
- |
- mflo $24 |
- mfhi $25 |
- slt $3,$25,$0 |
- sll $25,1 |
- multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2); |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- mflo $24 |
- mfhi $25 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3); |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- sw $7,2*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- slt $7,$25,$0 |
- sll $25,1 |
- multu $13,$14 # mul_add_c(a2[1],b[2],c1,c2,c3); |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- mflo $24 |
- mfhi $25 |
- slt $1,$25,$0 |
- addu $7,$1 |
- multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1); |
- sll $25,1 |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- addu $25,$1 |
- addu $3,$25 |
- sltu $1,$3,$25 |
- addu $7,$1 |
- sw $2,3*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- slt $2,$25,$0 |
- sll $25,1 |
- multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1); |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- mflo $24 |
- mfhi $25 |
- addu $3,$24 |
- sltu $1,$3,$24 |
- multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2); |
- addu $25,$1 |
- addu $7,$25 |
- sltu $1,$7,$25 |
- addu $2,$1 |
- sw $3,4*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- slt $3,$25,$0 |
- sll $25,1 |
- multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3); |
- slt $6,$24,$0 |
- addu $25,$6 |
- sll $24,1 |
- addu $7,$24 |
- sltu $1,$7,$24 |
- addu $25,$1 |
- addu $2,$25 |
- sltu $1,$2,$25 |
- addu $3,$1 |
- sw $7,5*4($4) |
- |
- mflo $24 |
- mfhi $25 |
- addu $2,$24 |
- sltu $1,$2,$24 |
- addu $25,$1 |
- addu $3,$25 |
- sw $2,6*4($4) |
- sw $3,7*4($4) |
- |
- .set noreorder |
- jr $31 |
- nop |
-.end bn_sqr_comba4 |