| Index: openssl/crypto/rc4/asm/rc4-parisc.pl
|
| diff --git a/openssl/crypto/rc4/asm/rc4-parisc.pl b/openssl/crypto/rc4/asm/rc4-parisc.pl
|
| deleted file mode 100644
|
| index 9165067080eff92155ddb6a404f547268fb774fb..0000000000000000000000000000000000000000
|
| --- a/openssl/crypto/rc4/asm/rc4-parisc.pl
|
| +++ /dev/null
|
| @@ -1,313 +0,0 @@
|
| -#!/usr/bin/env perl
|
| -
|
| -# ====================================================================
|
| -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
| -# project. The module is, however, dual licensed under OpenSSL and
|
| -# CRYPTOGAMS licenses depending on where you obtain it. For further
|
| -# details see http://www.openssl.org/~appro/cryptogams/.
|
| -# ====================================================================
|
| -
|
| -# RC4 for PA-RISC.
|
| -
|
| -# June 2009.
|
| -#
|
| -# Performance is 33% better than gcc 3.2 generated code on PA-7100LC.
|
| -# For reference, [4x] unrolled loop is >40% faster than folded one.
|
| -# It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement
|
| -# is believed to be not sufficient to justify the effort...
|
| -#
|
| -# Special thanks to polarhome.com for providing HP-UX account.
|
| -
|
| -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
| -
|
| -$flavour = shift;
|
| -$output = shift;
|
| -open STDOUT,">$output";
|
| -
|
| -if ($flavour =~ /64/) {
|
| - $LEVEL ="2.0W";
|
| - $SIZE_T =8;
|
| - $FRAME_MARKER =80;
|
| - $SAVED_RP =16;
|
| - $PUSH ="std";
|
| - $PUSHMA ="std,ma";
|
| - $POP ="ldd";
|
| - $POPMB ="ldd,mb";
|
| -} else {
|
| - $LEVEL ="1.0";
|
| - $SIZE_T =4;
|
| - $FRAME_MARKER =48;
|
| - $SAVED_RP =20;
|
| - $PUSH ="stw";
|
| - $PUSHMA ="stwm";
|
| - $POP ="ldw";
|
| - $POPMB ="ldwm";
|
| -}
|
| -
|
| -$FRAME=4*$SIZE_T+$FRAME_MARKER; # 4 saved regs + frame marker
|
| - # [+ argument transfer]
|
| -$SZ=1; # defaults to RC4_CHAR
|
| -if (open CONF,"<${dir}../../opensslconf.h") {
|
| - while(<CONF>) {
|
| - if (m/#\s*define\s+RC4_INT\s+(.*)/) {
|
| - $SZ = ($1=~/char$/) ? 1 : 4;
|
| - last;
|
| - }
|
| - }
|
| - close CONF;
|
| -}
|
| -
|
| -if ($SZ==1) { # RC4_CHAR
|
| - $LD="ldb";
|
| - $LDX="ldbx";
|
| - $MKX="addl";
|
| - $ST="stb";
|
| -} else { # RC4_INT (~5% faster than RC4_CHAR on PA-7100LC)
|
| - $LD="ldw";
|
| - $LDX="ldwx,s";
|
| - $MKX="sh2addl";
|
| - $ST="stw";
|
| -}
|
| -
|
| -$key="%r26";
|
| -$len="%r25";
|
| -$inp="%r24";
|
| -$out="%r23";
|
| -
|
| -@XX=("%r19","%r20");
|
| -@TX=("%r21","%r22");
|
| -$YY="%r28";
|
| -$TY="%r29";
|
| -
|
| -$acc="%r1";
|
| -$ix="%r2";
|
| -$iy="%r3";
|
| -$dat0="%r4";
|
| -$dat1="%r5";
|
| -$rem="%r6";
|
| -$mask="%r31";
|
| -
|
| -sub unrolledloopbody {
|
| -for ($i=0;$i<4;$i++) {
|
| -$code.=<<___;
|
| - ldo 1($XX[0]),$XX[1]
|
| - `sprintf("$LDX %$TY(%$key),%$dat1") if ($i>0)`
|
| - and $mask,$XX[1],$XX[1]
|
| - $LDX $YY($key),$TY
|
| - $MKX $YY,$key,$ix
|
| - $LDX $XX[1]($key),$TX[1]
|
| - $MKX $XX[0],$key,$iy
|
| - $ST $TX[0],0($ix)
|
| - comclr,<> $XX[1],$YY,%r0 ; conditional
|
| - copy $TX[0],$TX[1] ; move
|
| - `sprintf("%sdep %$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)`
|
| - $ST $TY,0($iy)
|
| - addl $TX[0],$TY,$TY
|
| - addl $TX[1],$YY,$YY
|
| - and $mask,$TY,$TY
|
| - and $mask,$YY,$YY
|
| -___
|
| -push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
|
| -} }
|
| -
|
| -sub foldedloop {
|
| -my ($label,$count)=@_;
|
| -$code.=<<___;
|
| -$label
|
| - $MKX $YY,$key,$iy
|
| - $LDX $YY($key),$TY
|
| - $MKX $XX[0],$key,$ix
|
| - $ST $TX[0],0($iy)
|
| - ldo 1($XX[0]),$XX[0]
|
| - $ST $TY,0($ix)
|
| - addl $TX[0],$TY,$TY
|
| - ldbx $inp($out),$dat1
|
| - and $mask,$TY,$TY
|
| - and $mask,$XX[0],$XX[0]
|
| - $LDX $TY($key),$acc
|
| - $LDX $XX[0]($key),$TX[0]
|
| - ldo 1($out),$out
|
| - xor $dat1,$acc,$acc
|
| - addl $TX[0],$YY,$YY
|
| - stb $acc,-1($out)
|
| - addib,<> -1,$count,$label ; $count is always small
|
| - and $mask,$YY,$YY
|
| -___
|
| -}
|
| -
|
| -$code=<<___;
|
| - .LEVEL $LEVEL
|
| - .SPACE \$TEXT\$
|
| - .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
|
| -
|
| - .EXPORT RC4,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
|
| -RC4
|
| - .PROC
|
| - .CALLINFO FRAME=`$FRAME-4*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=6
|
| - .ENTRY
|
| - $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
|
| - $PUSHMA %r3,$FRAME(%sp)
|
| - $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
|
| - $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
|
| - $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
|
| -
|
| - cmpib,*= 0,$len,L\$abort
|
| - sub $inp,$out,$inp ; distance between $inp and $out
|
| -
|
| - $LD `0*$SZ`($key),$XX[0]
|
| - $LD `1*$SZ`($key),$YY
|
| - ldo `2*$SZ`($key),$key
|
| -
|
| - ldi 0xff,$mask
|
| - ldi 3,$dat0
|
| -
|
| - ldo 1($XX[0]),$XX[0] ; warm up loop
|
| - and $mask,$XX[0],$XX[0]
|
| - $LDX $XX[0]($key),$TX[0]
|
| - addl $TX[0],$YY,$YY
|
| - cmpib,*>>= 6,$len,L\$oop1 ; is $len large enough to bother?
|
| - and $mask,$YY,$YY
|
| -
|
| - and,<> $out,$dat0,$rem ; is $out aligned?
|
| - b L\$alignedout
|
| - subi 4,$rem,$rem
|
| - sub $len,$rem,$len
|
| -___
|
| -&foldedloop("L\$alignout",$rem); # process till $out is aligned
|
| -
|
| -$code.=<<___;
|
| -L\$alignedout ; $len is at least 4 here
|
| - and,<> $inp,$dat0,$acc ; is $inp aligned?
|
| - b L\$oop4
|
| - sub $inp,$acc,$rem ; align $inp
|
| -
|
| - sh3addl $acc,%r0,$acc
|
| - subi 32,$acc,$acc
|
| - mtctl $acc,%cr11 ; load %sar with vshd align factor
|
| - ldwx $rem($out),$dat0
|
| - ldo 4($rem),$rem
|
| -L\$oop4misalignedinp
|
| -___
|
| -&unrolledloopbody();
|
| -$code.=<<___;
|
| - $LDX $TY($key),$ix
|
| - ldwx $rem($out),$dat1
|
| - ldo -4($len),$len
|
| - or $ix,$acc,$acc ; last piece, no need to dep
|
| - vshd $dat0,$dat1,$iy ; align data
|
| - copy $dat1,$dat0
|
| - xor $iy,$acc,$acc
|
| - stw $acc,0($out)
|
| - cmpib,*<< 3,$len,L\$oop4misalignedinp
|
| - ldo 4($out),$out
|
| - cmpib,*= 0,$len,L\$done
|
| - nop
|
| - b L\$oop1
|
| - nop
|
| -
|
| - .ALIGN 8
|
| -L\$oop4
|
| -___
|
| -&unrolledloopbody();
|
| -$code.=<<___;
|
| - $LDX $TY($key),$ix
|
| - ldwx $inp($out),$dat0
|
| - ldo -4($len),$len
|
| - or $ix,$acc,$acc ; last piece, no need to dep
|
| - xor $dat0,$acc,$acc
|
| - stw $acc,0($out)
|
| - cmpib,*<< 3,$len,L\$oop4
|
| - ldo 4($out),$out
|
| - cmpib,*= 0,$len,L\$done
|
| - nop
|
| -___
|
| -&foldedloop("L\$oop1",$len);
|
| -$code.=<<___;
|
| -L\$done
|
| - $POP `-$FRAME-$SAVED_RP`(%sp),%r2
|
| - ldo -1($XX[0]),$XX[0] ; chill out loop
|
| - sub $YY,$TX[0],$YY
|
| - and $mask,$XX[0],$XX[0]
|
| - and $mask,$YY,$YY
|
| - $ST $XX[0],`-2*$SZ`($key)
|
| - $ST $YY,`-1*$SZ`($key)
|
| - $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
|
| - $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
|
| - $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
|
| -L\$abort
|
| - bv (%r2)
|
| - .EXIT
|
| - $POPMB -$FRAME(%sp),%r3
|
| - .PROCEND
|
| -___
|
| -
|
| -$code.=<<___;
|
| -
|
| - .EXPORT private_RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
|
| - .ALIGN 8
|
| -private_RC4_set_key
|
| - .PROC
|
| - .CALLINFO NO_CALLS
|
| - .ENTRY
|
| - $ST %r0,`0*$SZ`($key)
|
| - $ST %r0,`1*$SZ`($key)
|
| - ldo `2*$SZ`($key),$key
|
| - copy %r0,@XX[0]
|
| -L\$1st
|
| - $ST @XX[0],0($key)
|
| - ldo 1(@XX[0]),@XX[0]
|
| - bb,>= @XX[0],`31-8`,L\$1st ; @XX[0]<256
|
| - ldo $SZ($key),$key
|
| -
|
| - ldo `-256*$SZ`($key),$key ; rewind $key
|
| - addl $len,$inp,$inp ; $inp to point at the end
|
| - sub %r0,$len,%r23 ; inverse index
|
| - copy %r0,@XX[0]
|
| - copy %r0,@XX[1]
|
| - ldi 0xff,$mask
|
| -
|
| -L\$2nd
|
| - $LDX @XX[0]($key),@TX[0]
|
| - ldbx %r23($inp),@TX[1]
|
| - addi,nuv 1,%r23,%r23 ; increment and conditional
|
| - sub %r0,$len,%r23 ; inverse index
|
| - addl @TX[0],@XX[1],@XX[1]
|
| - addl @TX[1],@XX[1],@XX[1]
|
| - and $mask,@XX[1],@XX[1]
|
| - $MKX @XX[0],$key,$TY
|
| - $LDX @XX[1]($key),@TX[1]
|
| - $MKX @XX[1],$key,$YY
|
| - ldo 1(@XX[0]),@XX[0]
|
| - $ST @TX[0],0($YY)
|
| - bb,>= @XX[0],`31-8`,L\$2nd ; @XX[0]<256
|
| - $ST @TX[1],0($TY)
|
| -
|
| - bv,n (%r2)
|
| - .EXIT
|
| - nop
|
| - .PROCEND
|
| -
|
| - .EXPORT RC4_options,ENTRY
|
| - .ALIGN 8
|
| -RC4_options
|
| - .PROC
|
| - .CALLINFO NO_CALLS
|
| - .ENTRY
|
| - blr %r0,%r28
|
| - ldi 3,%r1
|
| -L\$pic
|
| - andcm %r28,%r1,%r28
|
| - bv (%r2)
|
| - .EXIT
|
| - ldo L\$opts-L\$pic(%r28),%r28
|
| - .PROCEND
|
| - .ALIGN 8
|
| -L\$opts
|
| - .STRINGZ "rc4(4x,`$SZ==1?"char":"int"`)"
|
| - .STRINGZ "RC4 for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
|
| -___
|
| -$code =~ s/\`([^\`]*)\`/eval $1/gem;
|
| -$code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4);
|
| -
|
| -print $code;
|
| -close STDOUT;
|
|
|