Index: openssl/crypto/rc4/asm/rc4-parisc.pl |
diff --git a/openssl/crypto/rc4/asm/rc4-parisc.pl b/openssl/crypto/rc4/asm/rc4-parisc.pl |
deleted file mode 100644 |
index 9165067080eff92155ddb6a404f547268fb774fb..0000000000000000000000000000000000000000 |
--- a/openssl/crypto/rc4/asm/rc4-parisc.pl |
+++ /dev/null |
@@ -1,313 +0,0 @@ |
-#!/usr/bin/env perl |
- |
-# ==================================================================== |
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL |
-# project. The module is, however, dual licensed under OpenSSL and |
-# CRYPTOGAMS licenses depending on where you obtain it. For further |
-# details see http://www.openssl.org/~appro/cryptogams/. |
-# ==================================================================== |
- |
-# RC4 for PA-RISC. |
- |
-# June 2009. |
-# |
-# Performance is 33% better than gcc 3.2 generated code on PA-7100LC. |
-# For reference, [4x] unrolled loop is >40% faster than folded one. |
-# It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement |
-# is believed to be not sufficient to justify the effort... |
-# |
-# Special thanks to polarhome.com for providing HP-UX account. |
- |
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
- |
-$flavour = shift; |
-$output = shift; |
-open STDOUT,">$output"; |
- |
-if ($flavour =~ /64/) { |
- $LEVEL ="2.0W"; |
- $SIZE_T =8; |
- $FRAME_MARKER =80; |
- $SAVED_RP =16; |
- $PUSH ="std"; |
- $PUSHMA ="std,ma"; |
- $POP ="ldd"; |
- $POPMB ="ldd,mb"; |
-} else { |
- $LEVEL ="1.0"; |
- $SIZE_T =4; |
- $FRAME_MARKER =48; |
- $SAVED_RP =20; |
- $PUSH ="stw"; |
- $PUSHMA ="stwm"; |
- $POP ="ldw"; |
- $POPMB ="ldwm"; |
-} |
- |
-$FRAME=4*$SIZE_T+$FRAME_MARKER; # 4 saved regs + frame marker |
- # [+ argument transfer] |
-$SZ=1; # defaults to RC4_CHAR |
-if (open CONF,"<${dir}../../opensslconf.h") { |
- while(<CONF>) { |
- if (m/#\s*define\s+RC4_INT\s+(.*)/) { |
- $SZ = ($1=~/char$/) ? 1 : 4; |
- last; |
- } |
- } |
- close CONF; |
-} |
- |
-if ($SZ==1) { # RC4_CHAR |
- $LD="ldb"; |
- $LDX="ldbx"; |
- $MKX="addl"; |
- $ST="stb"; |
-} else { # RC4_INT (~5% faster than RC4_CHAR on PA-7100LC) |
- $LD="ldw"; |
- $LDX="ldwx,s"; |
- $MKX="sh2addl"; |
- $ST="stw"; |
-} |
- |
-$key="%r26"; |
-$len="%r25"; |
-$inp="%r24"; |
-$out="%r23"; |
- |
-@XX=("%r19","%r20"); |
-@TX=("%r21","%r22"); |
-$YY="%r28"; |
-$TY="%r29"; |
- |
-$acc="%r1"; |
-$ix="%r2"; |
-$iy="%r3"; |
-$dat0="%r4"; |
-$dat1="%r5"; |
-$rem="%r6"; |
-$mask="%r31"; |
- |
-sub unrolledloopbody { |
-for ($i=0;$i<4;$i++) { |
-$code.=<<___; |
- ldo 1($XX[0]),$XX[1] |
- `sprintf("$LDX %$TY(%$key),%$dat1") if ($i>0)` |
- and $mask,$XX[1],$XX[1] |
- $LDX $YY($key),$TY |
- $MKX $YY,$key,$ix |
- $LDX $XX[1]($key),$TX[1] |
- $MKX $XX[0],$key,$iy |
- $ST $TX[0],0($ix) |
- comclr,<> $XX[1],$YY,%r0 ; conditional |
- copy $TX[0],$TX[1] ; move |
- `sprintf("%sdep %$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)` |
- $ST $TY,0($iy) |
- addl $TX[0],$TY,$TY |
- addl $TX[1],$YY,$YY |
- and $mask,$TY,$TY |
- and $mask,$YY,$YY |
-___ |
-push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers |
-} } |
- |
-sub foldedloop { |
-my ($label,$count)=@_; |
-$code.=<<___; |
-$label |
- $MKX $YY,$key,$iy |
- $LDX $YY($key),$TY |
- $MKX $XX[0],$key,$ix |
- $ST $TX[0],0($iy) |
- ldo 1($XX[0]),$XX[0] |
- $ST $TY,0($ix) |
- addl $TX[0],$TY,$TY |
- ldbx $inp($out),$dat1 |
- and $mask,$TY,$TY |
- and $mask,$XX[0],$XX[0] |
- $LDX $TY($key),$acc |
- $LDX $XX[0]($key),$TX[0] |
- ldo 1($out),$out |
- xor $dat1,$acc,$acc |
- addl $TX[0],$YY,$YY |
- stb $acc,-1($out) |
- addib,<> -1,$count,$label ; $count is always small |
- and $mask,$YY,$YY |
-___ |
-} |
- |
-$code=<<___; |
- .LEVEL $LEVEL |
- .SPACE \$TEXT\$ |
- .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY |
- |
- .EXPORT RC4,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR |
-RC4 |
- .PROC |
- .CALLINFO FRAME=`$FRAME-4*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=6 |
- .ENTRY |
- $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue |
- $PUSHMA %r3,$FRAME(%sp) |
- $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) |
- $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) |
- $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) |
- |
- cmpib,*= 0,$len,L\$abort |
- sub $inp,$out,$inp ; distance between $inp and $out |
- |
- $LD `0*$SZ`($key),$XX[0] |
- $LD `1*$SZ`($key),$YY |
- ldo `2*$SZ`($key),$key |
- |
- ldi 0xff,$mask |
- ldi 3,$dat0 |
- |
- ldo 1($XX[0]),$XX[0] ; warm up loop |
- and $mask,$XX[0],$XX[0] |
- $LDX $XX[0]($key),$TX[0] |
- addl $TX[0],$YY,$YY |
- cmpib,*>>= 6,$len,L\$oop1 ; is $len large enough to bother? |
- and $mask,$YY,$YY |
- |
- and,<> $out,$dat0,$rem ; is $out aligned? |
- b L\$alignedout |
- subi 4,$rem,$rem |
- sub $len,$rem,$len |
-___ |
-&foldedloop("L\$alignout",$rem); # process till $out is aligned |
- |
-$code.=<<___; |
-L\$alignedout ; $len is at least 4 here |
- and,<> $inp,$dat0,$acc ; is $inp aligned? |
- b L\$oop4 |
- sub $inp,$acc,$rem ; align $inp |
- |
- sh3addl $acc,%r0,$acc |
- subi 32,$acc,$acc |
- mtctl $acc,%cr11 ; load %sar with vshd align factor |
- ldwx $rem($out),$dat0 |
- ldo 4($rem),$rem |
-L\$oop4misalignedinp |
-___ |
-&unrolledloopbody(); |
-$code.=<<___; |
- $LDX $TY($key),$ix |
- ldwx $rem($out),$dat1 |
- ldo -4($len),$len |
- or $ix,$acc,$acc ; last piece, no need to dep |
- vshd $dat0,$dat1,$iy ; align data |
- copy $dat1,$dat0 |
- xor $iy,$acc,$acc |
- stw $acc,0($out) |
- cmpib,*<< 3,$len,L\$oop4misalignedinp |
- ldo 4($out),$out |
- cmpib,*= 0,$len,L\$done |
- nop |
- b L\$oop1 |
- nop |
- |
- .ALIGN 8 |
-L\$oop4 |
-___ |
-&unrolledloopbody(); |
-$code.=<<___; |
- $LDX $TY($key),$ix |
- ldwx $inp($out),$dat0 |
- ldo -4($len),$len |
- or $ix,$acc,$acc ; last piece, no need to dep |
- xor $dat0,$acc,$acc |
- stw $acc,0($out) |
- cmpib,*<< 3,$len,L\$oop4 |
- ldo 4($out),$out |
- cmpib,*= 0,$len,L\$done |
- nop |
-___ |
-&foldedloop("L\$oop1",$len); |
-$code.=<<___; |
-L\$done |
- $POP `-$FRAME-$SAVED_RP`(%sp),%r2 |
- ldo -1($XX[0]),$XX[0] ; chill out loop |
- sub $YY,$TX[0],$YY |
- and $mask,$XX[0],$XX[0] |
- and $mask,$YY,$YY |
- $ST $XX[0],`-2*$SZ`($key) |
- $ST $YY,`-1*$SZ`($key) |
- $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 |
- $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 |
- $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 |
-L\$abort |
- bv (%r2) |
- .EXIT |
- $POPMB -$FRAME(%sp),%r3 |
- .PROCEND |
-___ |
- |
-$code.=<<___; |
- |
- .EXPORT private_RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR |
- .ALIGN 8 |
-private_RC4_set_key |
- .PROC |
- .CALLINFO NO_CALLS |
- .ENTRY |
- $ST %r0,`0*$SZ`($key) |
- $ST %r0,`1*$SZ`($key) |
- ldo `2*$SZ`($key),$key |
- copy %r0,@XX[0] |
-L\$1st |
- $ST @XX[0],0($key) |
- ldo 1(@XX[0]),@XX[0] |
- bb,>= @XX[0],`31-8`,L\$1st ; @XX[0]<256 |
- ldo $SZ($key),$key |
- |
- ldo `-256*$SZ`($key),$key ; rewind $key |
- addl $len,$inp,$inp ; $inp to point at the end |
- sub %r0,$len,%r23 ; inverse index |
- copy %r0,@XX[0] |
- copy %r0,@XX[1] |
- ldi 0xff,$mask |
- |
-L\$2nd |
- $LDX @XX[0]($key),@TX[0] |
- ldbx %r23($inp),@TX[1] |
- addi,nuv 1,%r23,%r23 ; increment and conditional |
- sub %r0,$len,%r23 ; inverse index |
- addl @TX[0],@XX[1],@XX[1] |
- addl @TX[1],@XX[1],@XX[1] |
- and $mask,@XX[1],@XX[1] |
- $MKX @XX[0],$key,$TY |
- $LDX @XX[1]($key),@TX[1] |
- $MKX @XX[1],$key,$YY |
- ldo 1(@XX[0]),@XX[0] |
- $ST @TX[0],0($YY) |
- bb,>= @XX[0],`31-8`,L\$2nd ; @XX[0]<256 |
- $ST @TX[1],0($TY) |
- |
- bv,n (%r2) |
- .EXIT |
- nop |
- .PROCEND |
- |
- .EXPORT RC4_options,ENTRY |
- .ALIGN 8 |
-RC4_options |
- .PROC |
- .CALLINFO NO_CALLS |
- .ENTRY |
- blr %r0,%r28 |
- ldi 3,%r1 |
-L\$pic |
- andcm %r28,%r1,%r28 |
- bv (%r2) |
- .EXIT |
- ldo L\$opts-L\$pic(%r28),%r28 |
- .PROCEND |
- .ALIGN 8 |
-L\$opts |
- .STRINGZ "rc4(4x,`$SZ==1?"char":"int"`)" |
- .STRINGZ "RC4 for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>" |
-___ |
-$code =~ s/\`([^\`]*)\`/eval $1/gem; |
-$code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4); |
- |
-print $code; |
-close STDOUT; |