OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env perl |
| 2 # |
| 3 # ==================================================================== |
| 4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL |
| 5 # project. The module is, however, dual licensed under OpenSSL and |
| 6 # CRYPTOGAMS licenses depending on where you obtain it. For further |
| 7 # details see http://www.openssl.org/~appro/cryptogams/. |
| 8 # ==================================================================== |
| 9 # |
| 10 # February 2009 |
| 11 # |
| 12 # Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to |
| 13 # "cluster" Address Generation Interlocks, so that one pipeline stall |
| 14 # resolves several dependencies. |
| 15 |
| 16 $rp="%r14"; |
| 17 $sp="%r15"; |
| 18 $code=<<___; |
| 19 .text |
| 20 |
| 21 ___ |
| 22 |
| 23 # void RC4(RC4_KEY *key,size_t len,const void *inp,void *out) |
| 24 { |
| 25 $acc="%r0"; |
| 26 $cnt="%r1"; |
| 27 $key="%r2"; |
| 28 $len="%r3"; |
| 29 $inp="%r4"; |
| 30 $out="%r5"; |
| 31 |
| 32 @XX=("%r6","%r7"); |
| 33 @TX=("%r8","%r9"); |
| 34 $YY="%r10"; |
| 35 $TY="%r11"; |
| 36 |
| 37 $code.=<<___; |
| 38 .globl RC4 |
| 39 .type RC4,\@function |
| 40 .align 64 |
| 41 RC4: |
| 42 stmg %r6,%r11,48($sp) |
| 43 llgc $XX[0],0($key) |
| 44 llgc $YY,1($key) |
| 45 la $XX[0],1($XX[0]) |
| 46 nill $XX[0],0xff |
| 47 srlg $cnt,$len,3 |
| 48 ltgr $cnt,$cnt |
| 49 llgc $TX[0],2($XX[0],$key) |
| 50 jz .Lshort |
| 51 j .Loop8 |
| 52 |
| 53 .align 64 |
| 54 .Loop8: |
| 55 ___ |
| 56 for ($i=0;$i<8;$i++) { |
| 57 $code.=<<___; |
| 58 la $YY,0($YY,$TX[0]) # $i |
| 59 nill $YY,255 |
| 60 la $XX[1],1($XX[0]) |
| 61 nill $XX[1],255 |
| 62 ___ |
| 63 $code.=<<___ if ($i==1); |
| 64 llgc $acc,2($TY,$key) |
| 65 ___ |
| 66 $code.=<<___ if ($i>1); |
| 67 sllg $acc,$acc,8 |
| 68 ic $acc,2($TY,$key) |
| 69 ___ |
| 70 $code.=<<___; |
| 71 llgc $TY,2($YY,$key) |
| 72 stc $TX[0],2($YY,$key) |
| 73 llgc $TX[1],2($XX[1],$key) |
| 74 stc $TY,2($XX[0],$key) |
| 75 cr $XX[1],$YY |
| 76 jne .Lcmov$i |
| 77 la $TX[1],0($TX[0]) |
| 78 .Lcmov$i: |
| 79 la $TY,0($TY,$TX[0]) |
| 80 nill $TY,255 |
| 81 ___ |
| 82 push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers |
| 83 } |
| 84 |
| 85 $code.=<<___; |
| 86 lg $TX[1],0($inp) |
| 87 sllg $acc,$acc,8 |
| 88 la $inp,8($inp) |
| 89 ic $acc,2($TY,$key) |
| 90 xgr $acc,$TX[1] |
| 91 stg $acc,0($out) |
| 92 la $out,8($out) |
| 93 brct $cnt,.Loop8 |
| 94 |
| 95 .Lshort: |
| 96 lghi $acc,7 |
| 97 ngr $len,$acc |
| 98 jz .Lexit |
| 99 j .Loop1 |
| 100 |
| 101 .align 16 |
| 102 .Loop1: |
| 103 la $YY,0($YY,$TX[0]) |
| 104 nill $YY,255 |
| 105 llgc $TY,2($YY,$key) |
| 106 stc $TX[0],2($YY,$key) |
| 107 stc $TY,2($XX[0],$key) |
| 108 ar $TY,$TX[0] |
| 109 ahi $XX[0],1 |
| 110 nill $TY,255 |
| 111 nill $XX[0],255 |
| 112 llgc $acc,0($inp) |
| 113 la $inp,1($inp) |
| 114 llgc $TY,2($TY,$key) |
| 115 llgc $TX[0],2($XX[0],$key) |
| 116 xr $acc,$TY |
| 117 stc $acc,0($out) |
| 118 la $out,1($out) |
| 119 brct $len,.Loop1 |
| 120 |
| 121 .Lexit: |
| 122 ahi $XX[0],-1 |
| 123 stc $XX[0],0($key) |
| 124 stc $YY,1($key) |
| 125 lmg %r6,%r11,48($sp) |
| 126 br $rp |
| 127 .size RC4,.-RC4 |
| 128 .string "RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>" |
| 129 |
| 130 ___ |
| 131 } |
| 132 |
| 133 # void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp) |
| 134 { |
| 135 $cnt="%r0"; |
| 136 $idx="%r1"; |
| 137 $key="%r2"; |
| 138 $len="%r3"; |
| 139 $inp="%r4"; |
| 140 $acc="%r5"; |
| 141 $dat="%r6"; |
| 142 $ikey="%r7"; |
| 143 $iinp="%r8"; |
| 144 |
| 145 $code.=<<___; |
| 146 .globl RC4_set_key |
| 147 .type RC4_set_key,\@function |
| 148 .align 64 |
| 149 RC4_set_key: |
| 150 stmg %r6,%r8,48($sp) |
| 151 lhi $cnt,256 |
| 152 la $idx,0(%r0) |
| 153 sth $idx,0($key) |
| 154 .align 4 |
| 155 .L1stloop: |
| 156 stc $idx,2($idx,$key) |
| 157 la $idx,1($idx) |
| 158 brct $cnt,.L1stloop |
| 159 |
| 160 lghi $ikey,-256 |
| 161 lr $cnt,$len |
| 162 la $iinp,0(%r0) |
| 163 la $idx,0(%r0) |
| 164 .align 16 |
| 165 .L2ndloop: |
| 166 llgc $acc,2+256($ikey,$key) |
| 167 llgc $dat,0($iinp,$inp) |
| 168 la $idx,0($idx,$acc) |
| 169 la $ikey,1($ikey) |
| 170 la $idx,0($idx,$dat) |
| 171 nill $idx,255 |
| 172 la $iinp,1($iinp) |
| 173 tml $ikey,255 |
| 174 llgc $dat,2($idx,$key) |
| 175 stc $dat,2+256-1($ikey,$key) |
| 176 stc $acc,2($idx,$key) |
| 177 jz .Ldone |
| 178 brct $cnt,.L2ndloop |
| 179 lr $cnt,$len |
| 180 la $iinp,0(%r0) |
| 181 j .L2ndloop |
| 182 .Ldone: |
| 183 lmg %r6,%r8,48($sp) |
| 184 br $rp |
| 185 .size RC4_set_key,.-RC4_set_key |
| 186 |
| 187 ___ |
| 188 } |
| 189 |
| 190 # const char *RC4_options() |
| 191 $code.=<<___; |
| 192 .globl RC4_options |
| 193 .type RC4_options,\@function |
| 194 .align 16 |
| 195 RC4_options: |
| 196 larl %r2,.Loptions |
| 197 br %r14 |
| 198 .size RC4_options,.-RC4_options |
| 199 .section .rodata |
| 200 .Loptions: |
| 201 .align 8 |
| 202 .string "rc4(8x,char)" |
| 203 ___ |
| 204 |
| 205 print $code; |
OLD | NEW |