| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/env perl | |
| 2 # | |
| 3 # ==================================================================== | |
| 4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | |
| 5 # project. The module is, however, dual licensed under OpenSSL and | |
| 6 # CRYPTOGAMS licenses depending on where you obtain it. For further | |
| 7 # details see http://www.openssl.org/~appro/cryptogams/. | |
| 8 # ==================================================================== | |
| 9 # | |
| 10 # February 2009 | |
| 11 # | |
| 12 # Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to | |
| 13 # "cluster" Address Generation Interlocks, so that one pipeline stall | |
| 14 # resolves several dependencies. | |
| 15 | |
| 16 # November 2010. | |
| 17 # | |
| 18 # Adapt for -m31 build. If kernel supports what's called "highgprs" | |
| 19 # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit | |
| 20 # instructions and achieve "64-bit" performance even in 31-bit legacy | |
| 21 # application context. The feature is not specific to any particular | |
| 22 # processor, as long as it's "z-CPU". Latter implies that the code | |
| 23 # remains z/Architecture specific. On z990 it was measured to perform | |
| 24 # 50% better than code generated by gcc 4.3. | |
| 25 | |
| 26 $flavour = shift; | |
| 27 | |
| 28 if ($flavour =~ /3[12]/) { | |
| 29 $SIZE_T=4; | |
| 30 $g=""; | |
| 31 } else { | |
| 32 $SIZE_T=8; | |
| 33 $g="g"; | |
| 34 } | |
| 35 | |
| 36 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} | |
| 37 open STDOUT,">$output"; | |
| 38 | |
| 39 $rp="%r14"; | |
| 40 $sp="%r15"; | |
| 41 $code=<<___; | |
| 42 .text | |
| 43 | |
| 44 ___ | |
| 45 | |
| 46 # void RC4(RC4_KEY *key,size_t len,const void *inp,void *out) | |
| 47 { | |
| 48 $acc="%r0"; | |
| 49 $cnt="%r1"; | |
| 50 $key="%r2"; | |
| 51 $len="%r3"; | |
| 52 $inp="%r4"; | |
| 53 $out="%r5"; | |
| 54 | |
| 55 @XX=("%r6","%r7"); | |
| 56 @TX=("%r8","%r9"); | |
| 57 $YY="%r10"; | |
| 58 $TY="%r11"; | |
| 59 | |
| 60 $code.=<<___; | |
| 61 .globl RC4 | |
| 62 .type RC4,\@function | |
| 63 .align 64 | |
| 64 RC4: | |
| 65 stm${g} %r6,%r11,6*$SIZE_T($sp) | |
| 66 ___ | |
| 67 $code.=<<___ if ($flavour =~ /3[12]/); | |
| 68 llgfr $len,$len | |
| 69 ___ | |
| 70 $code.=<<___; | |
| 71 llgc $XX[0],0($key) | |
| 72 llgc $YY,1($key) | |
| 73 la $XX[0],1($XX[0]) | |
| 74 nill $XX[0],0xff | |
| 75 srlg $cnt,$len,3 | |
| 76 ltgr $cnt,$cnt | |
| 77 llgc $TX[0],2($XX[0],$key) | |
| 78 jz .Lshort | |
| 79 j .Loop8 | |
| 80 | |
| 81 .align 64 | |
| 82 .Loop8: | |
| 83 ___ | |
| 84 for ($i=0;$i<8;$i++) { | |
| 85 $code.=<<___; | |
| 86 la $YY,0($YY,$TX[0]) # $i | |
| 87 nill $YY,255 | |
| 88 la $XX[1],1($XX[0]) | |
| 89 nill $XX[1],255 | |
| 90 ___ | |
| 91 $code.=<<___ if ($i==1); | |
| 92 llgc $acc,2($TY,$key) | |
| 93 ___ | |
| 94 $code.=<<___ if ($i>1); | |
| 95 sllg $acc,$acc,8 | |
| 96 ic $acc,2($TY,$key) | |
| 97 ___ | |
| 98 $code.=<<___; | |
| 99 llgc $TY,2($YY,$key) | |
| 100 stc $TX[0],2($YY,$key) | |
| 101 llgc $TX[1],2($XX[1],$key) | |
| 102 stc $TY,2($XX[0],$key) | |
| 103 cr $XX[1],$YY | |
| 104 jne .Lcmov$i | |
| 105 la $TX[1],0($TX[0]) | |
| 106 .Lcmov$i: | |
| 107 la $TY,0($TY,$TX[0]) | |
| 108 nill $TY,255 | |
| 109 ___ | |
| 110 push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers | |
| 111 } | |
| 112 | |
| 113 $code.=<<___; | |
| 114 lg $TX[1],0($inp) | |
| 115 sllg $acc,$acc,8 | |
| 116 la $inp,8($inp) | |
| 117 ic $acc,2($TY,$key) | |
| 118 xgr $acc,$TX[1] | |
| 119 stg $acc,0($out) | |
| 120 la $out,8($out) | |
| 121 brctg $cnt,.Loop8 | |
| 122 | |
| 123 .Lshort: | |
| 124 lghi $acc,7 | |
| 125 ngr $len,$acc | |
| 126 jz .Lexit | |
| 127 j .Loop1 | |
| 128 | |
| 129 .align 16 | |
| 130 .Loop1: | |
| 131 la $YY,0($YY,$TX[0]) | |
| 132 nill $YY,255 | |
| 133 llgc $TY,2($YY,$key) | |
| 134 stc $TX[0],2($YY,$key) | |
| 135 stc $TY,2($XX[0],$key) | |
| 136 ar $TY,$TX[0] | |
| 137 ahi $XX[0],1 | |
| 138 nill $TY,255 | |
| 139 nill $XX[0],255 | |
| 140 llgc $acc,0($inp) | |
| 141 la $inp,1($inp) | |
| 142 llgc $TY,2($TY,$key) | |
| 143 llgc $TX[0],2($XX[0],$key) | |
| 144 xr $acc,$TY | |
| 145 stc $acc,0($out) | |
| 146 la $out,1($out) | |
| 147 brct $len,.Loop1 | |
| 148 | |
| 149 .Lexit: | |
| 150 ahi $XX[0],-1 | |
| 151 stc $XX[0],0($key) | |
| 152 stc $YY,1($key) | |
| 153 lm${g} %r6,%r11,6*$SIZE_T($sp) | |
| 154 br $rp | |
| 155 .size RC4,.-RC4 | |
| 156 .string "RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>" | |
| 157 | |
| 158 ___ | |
| 159 } | |
| 160 | |
| 161 # void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp) | |
| 162 { | |
| 163 $cnt="%r0"; | |
| 164 $idx="%r1"; | |
| 165 $key="%r2"; | |
| 166 $len="%r3"; | |
| 167 $inp="%r4"; | |
| 168 $acc="%r5"; | |
| 169 $dat="%r6"; | |
| 170 $ikey="%r7"; | |
| 171 $iinp="%r8"; | |
| 172 | |
| 173 $code.=<<___; | |
| 174 .globl private_RC4_set_key | |
| 175 .type private_RC4_set_key,\@function | |
| 176 .align 64 | |
| 177 private_RC4_set_key: | |
| 178 stm${g} %r6,%r8,6*$SIZE_T($sp) | |
| 179 lhi $cnt,256 | |
| 180 la $idx,0(%r0) | |
| 181 sth $idx,0($key) | |
| 182 .align 4 | |
| 183 .L1stloop: | |
| 184 stc $idx,2($idx,$key) | |
| 185 la $idx,1($idx) | |
| 186 brct $cnt,.L1stloop | |
| 187 | |
| 188 lghi $ikey,-256 | |
| 189 lr $cnt,$len | |
| 190 la $iinp,0(%r0) | |
| 191 la $idx,0(%r0) | |
| 192 .align 16 | |
| 193 .L2ndloop: | |
| 194 llgc $acc,2+256($ikey,$key) | |
| 195 llgc $dat,0($iinp,$inp) | |
| 196 la $idx,0($idx,$acc) | |
| 197 la $ikey,1($ikey) | |
| 198 la $idx,0($idx,$dat) | |
| 199 nill $idx,255 | |
| 200 la $iinp,1($iinp) | |
| 201 tml $ikey,255 | |
| 202 llgc $dat,2($idx,$key) | |
| 203 stc $dat,2+256-1($ikey,$key) | |
| 204 stc $acc,2($idx,$key) | |
| 205 jz .Ldone | |
| 206 brct $cnt,.L2ndloop | |
| 207 lr $cnt,$len | |
| 208 la $iinp,0(%r0) | |
| 209 j .L2ndloop | |
| 210 .Ldone: | |
| 211 lm${g} %r6,%r8,6*$SIZE_T($sp) | |
| 212 br $rp | |
| 213 .size private_RC4_set_key,.-private_RC4_set_key | |
| 214 | |
| 215 ___ | |
| 216 } | |
| 217 | |
| 218 # const char *RC4_options() | |
| 219 $code.=<<___; | |
| 220 .globl RC4_options | |
| 221 .type RC4_options,\@function | |
| 222 .align 16 | |
| 223 RC4_options: | |
| 224 larl %r2,.Loptions | |
| 225 br %r14 | |
| 226 .size RC4_options,.-RC4_options | |
| 227 .section .rodata | |
| 228 .Loptions: | |
| 229 .align 8 | |
| 230 .string "rc4(8x,char)" | |
| 231 ___ | |
| 232 | |
| 233 print $code; | |
| 234 close STDOUT; # force flush | |
| OLD | NEW |