Index: openssl/crypto/bn/asm/ppc.pl |
=================================================================== |
--- openssl/crypto/bn/asm/ppc.pl (revision 105093) |
+++ openssl/crypto/bn/asm/ppc.pl (working copy) |
@@ -100,9 +100,9 @@ |
# me a note at schari@us.ibm.com |
# |
-$opf = shift; |
+$flavour = shift; |
-if ($opf =~ /32\.s/) { |
+if ($flavour =~ /32/) { |
$BITS= 32; |
$BNSZ= $BITS/8; |
$ISA= "\"ppc\""; |
@@ -125,7 +125,7 @@ |
$INSR= "insrwi"; # insert right |
$ROTL= "rotlwi"; # rotate left by immediate |
$TR= "tw"; # conditional trap |
-} elsif ($opf =~ /64\.s/) { |
+} elsif ($flavour =~ /64/) { |
$BITS= 64; |
$BNSZ= $BITS/8; |
$ISA= "\"ppc64\""; |
@@ -149,93 +149,16 @@ |
$INSR= "insrdi"; # insert right |
$ROTL= "rotldi"; # rotate left by immediate |
$TR= "td"; # conditional trap |
-} else { die "nonsense $opf"; } |
+} else { die "nonsense $flavour"; } |
-( defined shift || open STDOUT,">$opf" ) || die "can't open $opf: $!"; |
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or |
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or |
+die "can't locate ppc-xlate.pl"; |
-# function entry points from the AIX code |
-# |
-# There are other, more elegant, ways to handle this. We (IBM) chose |
-# this approach as it plays well with scripts we run to 'namespace' |
-# OpenSSL .i.e. we add a prefix to all the public symbols so we can |
-# co-exist in the same process with other implementations of OpenSSL. |
-# 'cleverer' ways of doing these substitutions tend to hide data we |
-# need to be obvious. |
-# |
-my @items = ("bn_sqr_comba4", |
- "bn_sqr_comba8", |
- "bn_mul_comba4", |
- "bn_mul_comba8", |
- "bn_sub_words", |
- "bn_add_words", |
- "bn_div_words", |
- "bn_sqr_words", |
- "bn_mul_words", |
- "bn_mul_add_words"); |
+open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; |
-if ($opf =~ /linux/) { do_linux(); } |
-elsif ($opf =~ /aix/) { do_aix(); } |
-elsif ($opf =~ /osx/) { do_osx(); } |
-else { do_bsd(); } |
- |
-sub do_linux { |
- $d=&data(); |
- |
- if ($BITS==64) { |
- foreach $t (@items) { |
- $d =~ s/\.$t:/\ |
-\t.section\t".opd","aw"\ |
-\t.align\t3\ |
-\t.globl\t$t\ |
-$t:\ |
-\t.quad\t.$t,.TOC.\@tocbase,0\ |
-\t.size\t$t,24\ |
-\t.previous\n\ |
-\t.type\t.$t,\@function\ |
-\t.globl\t.$t\ |
-.$t:/g; |
- } |
- } |
- else { |
- foreach $t (@items) { |
- $d=~s/\.$t/$t/g; |
- } |
- } |
- # hide internal labels to avoid pollution of name table... |
- $d=~s/Lppcasm_/.Lppcasm_/gm; |
- print $d; |
-} |
- |
-sub do_aix { |
- # AIX assembler is smart enough to please the linker without |
- # making us do something special... |
- print &data(); |
-} |
- |
-# MacOSX 32 bit |
-sub do_osx { |
- $d=&data(); |
- # Change the bn symbol prefix from '.' to '_' |
- foreach $t (@items) { |
- $d=~s/\.$t/_$t/g; |
- } |
- # Change .machine to something OS X asm will accept |
- $d=~s/\.machine.*/.text/g; |
- $d=~s/\#/;/g; # change comment from '#' to ';' |
- print $d; |
-} |
- |
-# BSD (Untested) |
-sub do_bsd { |
- $d=&data(); |
- foreach $t (@items) { |
- $d=~s/\.$t/_$t/g; |
- } |
- print $d; |
-} |
- |
-sub data { |
- local($data)=<<EOF; |
+$data=<<EOF; |
#-------------------------------------------------------------------- |
# |
# |
@@ -297,34 +220,21 @@ |
# |
# Defines to be used in the assembly code. |
# |
-.set r0,0 # we use it as storage for value of 0 |
-.set SP,1 # preserved |
-.set RTOC,2 # preserved |
-.set r3,3 # 1st argument/return value |
-.set r4,4 # 2nd argument/volatile register |
-.set r5,5 # 3rd argument/volatile register |
-.set r6,6 # ... |
-.set r7,7 |
-.set r8,8 |
-.set r9,9 |
-.set r10,10 |
-.set r11,11 |
-.set r12,12 |
-.set r13,13 # not used, nor any other "below" it... |
+#.set r0,0 # we use it as storage for value of 0 |
+#.set SP,1 # preserved |
+#.set RTOC,2 # preserved |
+#.set r3,3 # 1st argument/return value |
+#.set r4,4 # 2nd argument/volatile register |
+#.set r5,5 # 3rd argument/volatile register |
+#.set r6,6 # ... |
+#.set r7,7 |
+#.set r8,8 |
+#.set r9,9 |
+#.set r10,10 |
+#.set r11,11 |
+#.set r12,12 |
+#.set r13,13 # not used, nor any other "below" it... |
-.set BO_IF_NOT,4 |
-.set BO_IF,12 |
-.set BO_dCTR_NZERO,16 |
-.set BO_dCTR_ZERO,18 |
-.set BO_ALWAYS,20 |
-.set CR0_LT,0; |
-.set CR0_GT,1; |
-.set CR0_EQ,2 |
-.set CR1_FX,4; |
-.set CR1_FEX,5; |
-.set CR1_VX,6 |
-.set LR,8 |
- |
# Declare function names to be global |
# NOTE: For gcc these names MUST be changed to remove |
# the first . i.e. for example change ".bn_sqr_comba4" |
@@ -344,7 +254,7 @@ |
# .text section |
- .machine $ISA |
+ .machine "any" |
# |
# NOTE: The following label name should be changed to |
@@ -478,7 +388,7 @@ |
$ST r9,`6*$BNSZ`(r3) #r[6]=c1 |
$ST r10,`7*$BNSZ`(r3) #r[7]=c2 |
- bclr BO_ALWAYS,CR0_LT |
+ blr |
.long 0x00000000 |
# |
@@ -903,7 +813,7 @@ |
$ST r9, `15*$BNSZ`(r3) #r[15]=c1; |
- bclr BO_ALWAYS,CR0_LT |
+ blr |
.long 0x00000000 |
@@ -1039,7 +949,7 @@ |
addze r11,r0 |
#mul_add_c(a[3],b[2],c3,c1,c2); |
$LD r6,`3*$BNSZ`(r4) |
- $LD r7,`2*$BNSZ`(r4) |
+ $LD r7,`2*$BNSZ`(r5) |
$UMULL r8,r6,r7 |
$UMULH r9,r6,r7 |
addc r12,r8,r12 |
@@ -1055,7 +965,7 @@ |
$ST r10,`6*$BNSZ`(r3) #r[6]=c1 |
$ST r11,`7*$BNSZ`(r3) #r[7]=c2 |
- bclr BO_ALWAYS,CR0_LT |
+ blr |
.long 0x00000000 |
# |
@@ -1591,7 +1501,7 @@ |
adde r10,r10,r9 |
$ST r12,`14*$BNSZ`(r3) #r[14]=c3; |
$ST r10,`15*$BNSZ`(r3) #r[15]=c1; |
- bclr BO_ALWAYS,CR0_LT |
+ blr |
.long 0x00000000 |
# |
@@ -1623,7 +1533,7 @@ |
subfc. r7,r0,r6 # If r6 is 0 then result is 0. |
# if r6 > 0 then result !=0 |
# In either case carry bit is set. |
- bc BO_IF,CR0_EQ,Lppcasm_sub_adios |
+ beq Lppcasm_sub_adios |
addi r4,r4,-$BNSZ |
addi r3,r3,-$BNSZ |
addi r5,r5,-$BNSZ |
@@ -1635,11 +1545,11 @@ |
# if carry = 1 this is r7-r8. Else it |
# is r7-r8 -1 as we need. |
$STU r6,$BNSZ(r3) |
- bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sub_mainloop |
+ bdnz- Lppcasm_sub_mainloop |
Lppcasm_sub_adios: |
subfze r3,r0 # if carry bit is set then r3 = 0 else -1 |
andi. r3,r3,1 # keep only last bit. |
- bclr BO_ALWAYS,CR0_LT |
+ blr |
.long 0x00000000 |
@@ -1670,7 +1580,7 @@ |
# check for r6 = 0. Is this needed? |
# |
addic. r6,r6,0 #test r6 and clear carry bit. |
- bc BO_IF,CR0_EQ,Lppcasm_add_adios |
+ beq Lppcasm_add_adios |
addi r4,r4,-$BNSZ |
addi r3,r3,-$BNSZ |
addi r5,r5,-$BNSZ |
@@ -1680,10 +1590,10 @@ |
$LDU r8,$BNSZ(r5) |
adde r8,r7,r8 |
$STU r8,$BNSZ(r3) |
- bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_add_mainloop |
+ bdnz- Lppcasm_add_mainloop |
Lppcasm_add_adios: |
addze r3,r0 #return carry bit. |
- bclr BO_ALWAYS,CR0_LT |
+ blr |
.long 0x00000000 |
# |
@@ -1707,24 +1617,24 @@ |
# r5 = d |
$UCMPI 0,r5,0 # compare r5 and 0 |
- bc BO_IF_NOT,CR0_EQ,Lppcasm_div1 # proceed if d!=0 |
+ bne Lppcasm_div1 # proceed if d!=0 |
li r3,-1 # d=0 return -1 |
- bclr BO_ALWAYS,CR0_LT |
+ blr |
Lppcasm_div1: |
xor r0,r0,r0 #r0=0 |
li r8,$BITS |
$CNTLZ. r7,r5 #r7 = num leading 0s in d. |
- bc BO_IF,CR0_EQ,Lppcasm_div2 #proceed if no leading zeros |
+ beq Lppcasm_div2 #proceed if no leading zeros |
subf r8,r7,r8 #r8 = BN_num_bits_word(d) |
$SHR. r9,r3,r8 #are there any bits above r8'th? |
$TR 16,r9,r0 #if there're, signal to dump core... |
Lppcasm_div2: |
$UCMP 0,r3,r5 #h>=d? |
- bc BO_IF,CR0_LT,Lppcasm_div3 #goto Lppcasm_div3 if not |
+ blt Lppcasm_div3 #goto Lppcasm_div3 if not |
subf r3,r5,r3 #h-=d ; |
Lppcasm_div3: #r7 = BN_BITS2-i. so r7=i |
cmpi 0,0,r7,0 # is (i == 0)? |
- bc BO_IF,CR0_EQ,Lppcasm_div4 |
+ beq Lppcasm_div4 |
$SHL r3,r3,r7 # h = (h<< i) |
$SHR r8,r4,r8 # r8 = (l >> BN_BITS2 -i) |
$SHL r5,r5,r7 # d<<=i |
@@ -1741,7 +1651,7 @@ |
$SHRI r11,r4,`$BITS/2` #r11= (l&BN_MASK2h)>>BN_BITS4 |
# compute here for innerloop. |
$UCMP 0,r8,r9 # is (h>>BN_BITS4)==dh |
- bc BO_IF_NOT,CR0_EQ,Lppcasm_div5 # goto Lppcasm_div5 if not |
+ bne Lppcasm_div5 # goto Lppcasm_div5 if not |
li r8,-1 |
$CLRU r8,r8,`$BITS/2` #q = BN_MASK2l |
@@ -1762,9 +1672,9 @@ |
# the following 2 instructions do that |
$SHLI r7,r10,`$BITS/2` # r7 = (t<<BN_BITS4) |
or r7,r7,r11 # r7|=((l&BN_MASK2h)>>BN_BITS4) |
- $UCMP 1,r6,r7 # compare (tl <= r7) |
- bc BO_IF_NOT,CR0_EQ,Lppcasm_divinnerexit |
- bc BO_IF_NOT,CR1_FEX,Lppcasm_divinnerexit |
+ $UCMP cr1,r6,r7 # compare (tl <= r7) |
+ bne Lppcasm_divinnerexit |
+ ble cr1,Lppcasm_divinnerexit |
addi r8,r8,-1 #q-- |
subf r12,r9,r12 #th -=dh |
$CLRU r10,r5,`$BITS/2` #r10=dl. t is no longer needed in loop. |
@@ -1773,14 +1683,14 @@ |
Lppcasm_divinnerexit: |
$SHRI r10,r6,`$BITS/2` #t=(tl>>BN_BITS4) |
$SHLI r11,r6,`$BITS/2` #tl=(tl<<BN_BITS4)&BN_MASK2h; |
- $UCMP 1,r4,r11 # compare l and tl |
+ $UCMP cr1,r4,r11 # compare l and tl |
add r12,r12,r10 # th+=t |
- bc BO_IF_NOT,CR1_FX,Lppcasm_div7 # if (l>=tl) goto Lppcasm_div7 |
+ bge cr1,Lppcasm_div7 # if (l>=tl) goto Lppcasm_div7 |
addi r12,r12,1 # th++ |
Lppcasm_div7: |
subf r11,r11,r4 #r11=l-tl |
- $UCMP 1,r3,r12 #compare h and th |
- bc BO_IF_NOT,CR1_FX,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8 |
+ $UCMP cr1,r3,r12 #compare h and th |
+ bge cr1,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8 |
addi r8,r8,-1 # q-- |
add r3,r5,r3 # h+=d |
Lppcasm_div8: |
@@ -1791,12 +1701,12 @@ |
# the following 2 instructions will do this. |
$INSR r11,r12,`$BITS/2`,`$BITS/2` # r11 is the value we want rotated $BITS/2. |
$ROTL r3,r11,`$BITS/2` # rotate by $BITS/2 and store in r3 |
- bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_div9#if (count==0) break ; |
+ bdz Lppcasm_div9 #if (count==0) break ; |
$SHLI r0,r8,`$BITS/2` #ret =q<<BN_BITS4 |
b Lppcasm_divouterloop |
Lppcasm_div9: |
or r3,r8,r0 |
- bclr BO_ALWAYS,CR0_LT |
+ blr |
.long 0x00000000 |
# |
@@ -1822,7 +1732,7 @@ |
# No unrolling done here. Not performance critical. |
addic. r5,r5,0 #test r5. |
- bc BO_IF,CR0_EQ,Lppcasm_sqr_adios |
+ beq Lppcasm_sqr_adios |
addi r4,r4,-$BNSZ |
addi r3,r3,-$BNSZ |
mtctr r5 |
@@ -1833,9 +1743,9 @@ |
$UMULH r8,r6,r6 |
$STU r7,$BNSZ(r3) |
$STU r8,$BNSZ(r3) |
- bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sqr_mainloop |
+ bdnz- Lppcasm_sqr_mainloop |
Lppcasm_sqr_adios: |
- bclr BO_ALWAYS,CR0_LT |
+ blr |
.long 0x00000000 |
@@ -1858,7 +1768,7 @@ |
xor r0,r0,r0 |
xor r12,r12,r12 # used for carry |
rlwinm. r7,r5,30,2,31 # num >> 2 |
- bc BO_IF,CR0_EQ,Lppcasm_mw_REM |
+ beq Lppcasm_mw_REM |
mtctr r7 |
Lppcasm_mw_LOOP: |
#mul(rp[0],ap[0],w,c1); |
@@ -1896,11 +1806,11 @@ |
addi r3,r3,`4*$BNSZ` |
addi r4,r4,`4*$BNSZ` |
- bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_mw_LOOP |
+ bdnz- Lppcasm_mw_LOOP |
Lppcasm_mw_REM: |
andi. r5,r5,0x3 |
- bc BO_IF,CR0_EQ,Lppcasm_mw_OVER |
+ beq Lppcasm_mw_OVER |
#mul(rp[0],ap[0],w,c1); |
$LD r8,`0*$BNSZ`(r4) |
$UMULL r9,r6,r8 |
@@ -1912,7 +1822,7 @@ |
addi r5,r5,-1 |
cmpli 0,0,r5,0 |
- bc BO_IF,CR0_EQ,Lppcasm_mw_OVER |
+ beq Lppcasm_mw_OVER |
#mul(rp[1],ap[1],w,c1); |
@@ -1926,7 +1836,7 @@ |
addi r5,r5,-1 |
cmpli 0,0,r5,0 |
- bc BO_IF,CR0_EQ,Lppcasm_mw_OVER |
+ beq Lppcasm_mw_OVER |
#mul_add(rp[2],ap[2],w,c1); |
$LD r8,`2*$BNSZ`(r4) |
@@ -1939,7 +1849,7 @@ |
Lppcasm_mw_OVER: |
addi r3,r12,0 |
- bclr BO_ALWAYS,CR0_LT |
+ blr |
.long 0x00000000 |
# |
@@ -1964,7 +1874,7 @@ |
xor r0,r0,r0 #r0 = 0 |
xor r12,r12,r12 #r12 = 0 . used for carry |
rlwinm. r7,r5,30,2,31 # num >> 2 |
- bc BO_IF,CR0_EQ,Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover |
+ beq Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover |
mtctr r7 |
Lppcasm_maw_mainloop: |
#mul_add(rp[0],ap[0],w,c1); |
@@ -2017,11 +1927,11 @@ |
$ST r11,`3*$BNSZ`(r3) |
addi r3,r3,`4*$BNSZ` |
addi r4,r4,`4*$BNSZ` |
- bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_maw_mainloop |
+ bdnz- Lppcasm_maw_mainloop |
Lppcasm_maw_leftover: |
andi. r5,r5,0x3 |
- bc BO_IF,CR0_EQ,Lppcasm_maw_adios |
+ beq Lppcasm_maw_adios |
addi r3,r3,-$BNSZ |
addi r4,r4,-$BNSZ |
#mul_add(rp[0],ap[0],w,c1); |
@@ -2036,7 +1946,7 @@ |
addze r12,r10 |
$ST r9,0(r3) |
- bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios |
+ bdz Lppcasm_maw_adios |
#mul_add(rp[1],ap[1],w,c1); |
$LDU r8,$BNSZ(r4) |
$UMULL r9,r6,r8 |
@@ -2048,7 +1958,7 @@ |
addze r12,r10 |
$ST r9,0(r3) |
- bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios |
+ bdz Lppcasm_maw_adios |
#mul_add(rp[2],ap[2],w,c1); |
$LDU r8,$BNSZ(r4) |
$UMULL r9,r6,r8 |
@@ -2062,19 +1972,10 @@ |
Lppcasm_maw_adios: |
addi r3,r12,0 |
- bclr BO_ALWAYS,CR0_LT |
+ blr |
.long 0x00000000 |
.align 4 |
EOF |
- $data =~ s/\`([^\`]*)\`/eval $1/gem; |
- |
- # if some assembler chokes on some simplified mnemonic, |
- # this is the spot to fix it up, e.g.: |
- # GNU as doesn't seem to accept cmplw, 32-bit unsigned compare |
- $data =~ s/^(\s*)cmplw(\s+)([^,]+),(.*)/$1cmpl$2$3,0,$4/gm; |
- # assembler X doesn't accept li, load immediate value |
- #$data =~ s/^(\s*)li(\s+)([^,]+),(.*)/$1addi$2$3,0,$4/gm; |
- # assembler Y chokes on apostrophes in comments |
- $data =~ s/'//gm; |
- return($data); |
-} |
+$data =~ s/\`([^\`]*)\`/eval $1/gem; |
+print $data; |
+close STDOUT; |