| Index: openssl/crypto/rc4/asm/rc4-s390x.pl
|
| ===================================================================
|
| --- openssl/crypto/rc4/asm/rc4-s390x.pl (revision 0)
|
| +++ openssl/crypto/rc4/asm/rc4-s390x.pl (revision 0)
|
| @@ -0,0 +1,205 @@
|
| +#!/usr/bin/env perl
|
| +#
|
| +# ====================================================================
|
| +# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
| +# project. The module is, however, dual licensed under OpenSSL and
|
| +# CRYPTOGAMS licenses depending on where you obtain it. For further
|
| +# details see http://www.openssl.org/~appro/cryptogams/.
|
| +# ====================================================================
|
| +#
|
| +# February 2009
|
| +#
|
| +# Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to
|
| +# "cluster" Address Generation Interlocks, so that one pipeline stall
|
| +# resolves several dependencies.
|
| +
|
| +$rp="%r14";
|
| +$sp="%r15";
|
| +$code=<<___;
|
| +.text
|
| +
|
| +___
|
| +
|
| +# void RC4(RC4_KEY *key,size_t len,const void *inp,void *out)
|
| +{
|
| +$acc="%r0";
|
| +$cnt="%r1";
|
| +$key="%r2";
|
| +$len="%r3";
|
| +$inp="%r4";
|
| +$out="%r5";
|
| +
|
| +@XX=("%r6","%r7");
|
| +@TX=("%r8","%r9");
|
| +$YY="%r10";
|
| +$TY="%r11";
|
| +
|
| +$code.=<<___;
|
| +.globl RC4
|
| +.type RC4,\@function
|
| +.align 64
|
| +RC4:
|
| + stmg %r6,%r11,48($sp)
|
| + llgc $XX[0],0($key)
|
| + llgc $YY,1($key)
|
| + la $XX[0],1($XX[0])
|
| + nill $XX[0],0xff
|
| + srlg $cnt,$len,3
|
| + ltgr $cnt,$cnt
|
| + llgc $TX[0],2($XX[0],$key)
|
| + jz .Lshort
|
| + j .Loop8
|
| +
|
| +.align 64
|
| +.Loop8:
|
| +___
|
| +for ($i=0;$i<8;$i++) {
|
| +$code.=<<___;
|
| + la $YY,0($YY,$TX[0]) # $i
|
| + nill $YY,255
|
| + la $XX[1],1($XX[0])
|
| + nill $XX[1],255
|
| +___
|
| +$code.=<<___ if ($i==1);
|
| + llgc $acc,2($TY,$key)
|
| +___
|
| +$code.=<<___ if ($i>1);
|
| + sllg $acc,$acc,8
|
| + ic $acc,2($TY,$key)
|
| +___
|
| +$code.=<<___;
|
| + llgc $TY,2($YY,$key)
|
| + stc $TX[0],2($YY,$key)
|
| + llgc $TX[1],2($XX[1],$key)
|
| + stc $TY,2($XX[0],$key)
|
| + cr $XX[1],$YY
|
| + jne .Lcmov$i
|
| + la $TX[1],0($TX[0])
|
| +.Lcmov$i:
|
| + la $TY,0($TY,$TX[0])
|
| + nill $TY,255
|
| +___
|
| +push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
|
| +}
|
| +
|
| +$code.=<<___;
|
| + lg $TX[1],0($inp)
|
| + sllg $acc,$acc,8
|
| + la $inp,8($inp)
|
| + ic $acc,2($TY,$key)
|
| + xgr $acc,$TX[1]
|
| + stg $acc,0($out)
|
| + la $out,8($out)
|
| + brct $cnt,.Loop8
|
| +
|
| +.Lshort:
|
| + lghi $acc,7
|
| + ngr $len,$acc
|
| + jz .Lexit
|
| + j .Loop1
|
| +
|
| +.align 16
|
| +.Loop1:
|
| + la $YY,0($YY,$TX[0])
|
| + nill $YY,255
|
| + llgc $TY,2($YY,$key)
|
| + stc $TX[0],2($YY,$key)
|
| + stc $TY,2($XX[0],$key)
|
| + ar $TY,$TX[0]
|
| + ahi $XX[0],1
|
| + nill $TY,255
|
| + nill $XX[0],255
|
| + llgc $acc,0($inp)
|
| + la $inp,1($inp)
|
| + llgc $TY,2($TY,$key)
|
| + llgc $TX[0],2($XX[0],$key)
|
| + xr $acc,$TY
|
| + stc $acc,0($out)
|
| + la $out,1($out)
|
| + brct $len,.Loop1
|
| +
|
| +.Lexit:
|
| + ahi $XX[0],-1
|
| + stc $XX[0],0($key)
|
| + stc $YY,1($key)
|
| + lmg %r6,%r11,48($sp)
|
| + br $rp
|
| +.size RC4,.-RC4
|
| +.string "RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
|
| +
|
| +___
|
| +}
|
| +
|
| +# void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp)
|
| +{
|
| +$cnt="%r0";
|
| +$idx="%r1";
|
| +$key="%r2";
|
| +$len="%r3";
|
| +$inp="%r4";
|
| +$acc="%r5";
|
| +$dat="%r6";
|
| +$ikey="%r7";
|
| +$iinp="%r8";
|
| +
|
| +$code.=<<___;
|
| +.globl RC4_set_key
|
| +.type RC4_set_key,\@function
|
| +.align 64
|
| +RC4_set_key:
|
| + stmg %r6,%r8,48($sp)
|
| + lhi $cnt,256
|
| + la $idx,0(%r0)
|
| + sth $idx,0($key)
|
| +.align 4
|
| +.L1stloop:
|
| + stc $idx,2($idx,$key)
|
| + la $idx,1($idx)
|
| + brct $cnt,.L1stloop
|
| +
|
| + lghi $ikey,-256
|
| + lr $cnt,$len
|
| + la $iinp,0(%r0)
|
| + la $idx,0(%r0)
|
| +.align 16
|
| +.L2ndloop:
|
| + llgc $acc,2+256($ikey,$key)
|
| + llgc $dat,0($iinp,$inp)
|
| + la $idx,0($idx,$acc)
|
| + la $ikey,1($ikey)
|
| + la $idx,0($idx,$dat)
|
| + nill $idx,255
|
| + la $iinp,1($iinp)
|
| + tml $ikey,255
|
| + llgc $dat,2($idx,$key)
|
| + stc $dat,2+256-1($ikey,$key)
|
| + stc $acc,2($idx,$key)
|
| + jz .Ldone
|
| + brct $cnt,.L2ndloop
|
| + lr $cnt,$len
|
| + la $iinp,0(%r0)
|
| + j .L2ndloop
|
| +.Ldone:
|
| + lmg %r6,%r8,48($sp)
|
| + br $rp
|
| +.size RC4_set_key,.-RC4_set_key
|
| +
|
| +___
|
| +}
|
| +
|
| +# const char *RC4_options()
|
| +$code.=<<___;
|
| +.globl RC4_options
|
| +.type RC4_options,\@function
|
| +.align 16
|
| +RC4_options:
|
| + larl %r2,.Loptions
|
| + br %r14
|
| +.size RC4_options,.-RC4_options
|
| +.section .rodata
|
| +.Loptions:
|
| +.align 8
|
| +.string "rc4(8x,char)"
|
| +___
|
| +
|
| +print $code;
|
|
|