| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/env perl | |
| 2 | |
| 3 # ==================================================================== | |
| 4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | |
| 5 # project. The module is, however, dual licensed under OpenSSL and | |
| 6 # CRYPTOGAMS licenses depending on where you obtain it. For further | |
| 7 # details see http://www.openssl.org/~appro/cryptogams/. | |
| 8 # ==================================================================== | |
| 9 | |
| 10 # SHA1 block procedure for Alpha. | |
| 11 | |
| 12 # On 21264 performance is 33% better than code generated by vendor | |
| 13 # compiler, and 75% better than GCC [3.4], and in absolute terms is | |
| 14 # 8.7 cycles per processed byte. Implementation features vectorized | |
| 15 # byte swap, but not Xupdate. | |
| 16 | |
| 17 @X=( "\$0", "\$1", "\$2", "\$3", "\$4", "\$5", "\$6", "\$7", | |
| 18 "\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15"); | |
| 19 $ctx="a0"; # $16 | |
| 20 $inp="a1"; | |
| 21 $num="a2"; | |
| 22 $A="a3"; | |
| 23 $B="a4"; # 20 | |
| 24 $C="a5"; | |
| 25 $D="t8"; | |
| 26 $E="t9"; @V=($A,$B,$C,$D,$E); | |
| 27 $t0="t10"; # 24 | |
| 28 $t1="t11"; | |
| 29 $t2="ra"; | |
| 30 $t3="t12"; | |
| 31 $K="AT"; # 28 | |
| 32 | |
| 33 sub BODY_00_19 { | |
| 34 my ($i,$a,$b,$c,$d,$e)=@_; | |
| 35 my $j=$i+1; | |
| 36 $code.=<<___ if ($i==0); | |
| 37 ldq_u @X[0],0+0($inp) | |
| 38 ldq_u @X[1],0+7($inp) | |
| 39 ___ | |
| 40 $code.=<<___ if (!($i&1) && $i<14); | |
| 41 ldq_u @X[$i+2],($i+2)*4+0($inp) | |
| 42 ldq_u @X[$i+3],($i+2)*4+7($inp) | |
| 43 ___ | |
| 44 $code.=<<___ if (!($i&1) && $i<15); | |
| 45 extql @X[$i],$inp,@X[$i] | |
| 46 extqh @X[$i+1],$inp,@X[$i+1] | |
| 47 | |
| 48 or @X[$i+1],@X[$i],@X[$i] # pair of 32-bit values are fetched | |
| 49 | |
| 50 srl @X[$i],24,$t0 # vectorized byte swap | |
| 51 srl @X[$i],8,$t2 | |
| 52 | |
| 53 sll @X[$i],8,$t3 | |
| 54 sll @X[$i],24,@X[$i] | |
| 55 zapnot $t0,0x11,$t0 | |
| 56 zapnot $t2,0x22,$t2 | |
| 57 | |
| 58 zapnot @X[$i],0x88,@X[$i] | |
| 59 or $t0,$t2,$t0 | |
| 60 zapnot $t3,0x44,$t3 | |
| 61 sll $a,5,$t1 | |
| 62 | |
| 63 or @X[$i],$t0,@X[$i] | |
| 64 addl $K,$e,$e | |
| 65 and $b,$c,$t2 | |
| 66 zapnot $a,0xf,$a | |
| 67 | |
| 68 or @X[$i],$t3,@X[$i] | |
| 69 srl $a,27,$t0 | |
| 70 bic $d,$b,$t3 | |
| 71 sll $b,30,$b | |
| 72 | |
| 73 extll @X[$i],4,@X[$i+1] # extract upper half | |
| 74 or $t2,$t3,$t2 | |
| 75 addl @X[$i],$e,$e | |
| 76 | |
| 77 addl $t1,$e,$e | |
| 78 srl $b,32,$t3 | |
| 79 zapnot @X[$i],0xf,@X[$i] | |
| 80 | |
| 81 addl $t0,$e,$e | |
| 82 addl $t2,$e,$e | |
| 83 or $t3,$b,$b | |
| 84 ___ | |
| 85 $code.=<<___ if (($i&1) && $i<15); | |
| 86 sll $a,5,$t1 | |
| 87 addl $K,$e,$e | |
| 88 and $b,$c,$t2 | |
| 89 zapnot $a,0xf,$a | |
| 90 | |
| 91 srl $a,27,$t0 | |
| 92 addl @X[$i%16],$e,$e | |
| 93 bic $d,$b,$t3 | |
| 94 sll $b,30,$b | |
| 95 | |
| 96 or $t2,$t3,$t2 | |
| 97 addl $t1,$e,$e | |
| 98 srl $b,32,$t3 | |
| 99 zapnot @X[$i],0xf,@X[$i] | |
| 100 | |
| 101 addl $t0,$e,$e | |
| 102 addl $t2,$e,$e | |
| 103 or $t3,$b,$b | |
| 104 ___ | |
| 105 $code.=<<___ if ($i>=15); # with forward Xupdate | |
| 106 sll $a,5,$t1 | |
| 107 addl $K,$e,$e | |
| 108 and $b,$c,$t2 | |
| 109 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] | |
| 110 | |
| 111 zapnot $a,0xf,$a | |
| 112 addl @X[$i%16],$e,$e | |
| 113 bic $d,$b,$t3 | |
| 114 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] | |
| 115 | |
| 116 srl $a,27,$t0 | |
| 117 addl $t1,$e,$e | |
| 118 or $t2,$t3,$t2 | |
| 119 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] | |
| 120 | |
| 121 sll $b,30,$b | |
| 122 addl $t0,$e,$e | |
| 123 srl @X[$j%16],31,$t1 | |
| 124 | |
| 125 addl $t2,$e,$e | |
| 126 srl $b,32,$t3 | |
| 127 addl @X[$j%16],@X[$j%16],@X[$j%16] | |
| 128 | |
| 129 or $t3,$b,$b | |
| 130 zapnot @X[$i%16],0xf,@X[$i%16] | |
| 131 or $t1,@X[$j%16],@X[$j%16] | |
| 132 ___ | |
| 133 } | |
| 134 | |
| 135 sub BODY_20_39 { | |
| 136 my ($i,$a,$b,$c,$d,$e)=@_; | |
| 137 my $j=$i+1; | |
| 138 $code.=<<___ if ($i<79); # with forward Xupdate | |
| 139 sll $a,5,$t1 | |
| 140 addl $K,$e,$e | |
| 141 zapnot $a,0xf,$a | |
| 142 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] | |
| 143 | |
| 144 sll $b,30,$t3 | |
| 145 addl $t1,$e,$e | |
| 146 xor $b,$c,$t2 | |
| 147 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] | |
| 148 | |
| 149 srl $b,2,$b | |
| 150 addl @X[$i%16],$e,$e | |
| 151 xor $d,$t2,$t2 | |
| 152 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] | |
| 153 | |
| 154 srl @X[$j%16],31,$t1 | |
| 155 addl $t2,$e,$e | |
| 156 srl $a,27,$t0 | |
| 157 addl @X[$j%16],@X[$j%16],@X[$j%16] | |
| 158 | |
| 159 or $t3,$b,$b | |
| 160 addl $t0,$e,$e | |
| 161 or $t1,@X[$j%16],@X[$j%16] | |
| 162 ___ | |
| 163 $code.=<<___ if ($i<77); | |
| 164 zapnot @X[$i%16],0xf,@X[$i%16] | |
| 165 ___ | |
| 166 $code.=<<___ if ($i==79); # with context fetch | |
| 167 sll $a,5,$t1 | |
| 168 addl $K,$e,$e | |
| 169 zapnot $a,0xf,$a | |
| 170 ldl @X[0],0($ctx) | |
| 171 | |
| 172 sll $b,30,$t3 | |
| 173 addl $t1,$e,$e | |
| 174 xor $b,$c,$t2 | |
| 175 ldl @X[1],4($ctx) | |
| 176 | |
| 177 srl $b,2,$b | |
| 178 addl @X[$i%16],$e,$e | |
| 179 xor $d,$t2,$t2 | |
| 180 ldl @X[2],8($ctx) | |
| 181 | |
| 182 srl $a,27,$t0 | |
| 183 addl $t2,$e,$e | |
| 184 ldl @X[3],12($ctx) | |
| 185 | |
| 186 or $t3,$b,$b | |
| 187 addl $t0,$e,$e | |
| 188 ldl @X[4],16($ctx) | |
| 189 ___ | |
| 190 } | |
| 191 | |
| 192 sub BODY_40_59 { | |
| 193 my ($i,$a,$b,$c,$d,$e)=@_; | |
| 194 my $j=$i+1; | |
| 195 $code.=<<___; # with forward Xupdate | |
| 196 sll $a,5,$t1 | |
| 197 addl $K,$e,$e | |
| 198 zapnot $a,0xf,$a | |
| 199 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] | |
| 200 | |
| 201 srl $a,27,$t0 | |
| 202 and $b,$c,$t2 | |
| 203 and $b,$d,$t3 | |
| 204 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] | |
| 205 | |
| 206 sll $b,30,$b | |
| 207 addl $t1,$e,$e | |
| 208 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] | |
| 209 | |
| 210 srl @X[$j%16],31,$t1 | |
| 211 addl $t0,$e,$e | |
| 212 or $t2,$t3,$t2 | |
| 213 and $c,$d,$t3 | |
| 214 | |
| 215 or $t2,$t3,$t2 | |
| 216 srl $b,32,$t3 | |
| 217 addl @X[$i%16],$e,$e | |
| 218 addl @X[$j%16],@X[$j%16],@X[$j%16] | |
| 219 | |
| 220 or $t3,$b,$b | |
| 221 addl $t2,$e,$e | |
| 222 or $t1,@X[$j%16],@X[$j%16] | |
| 223 zapnot @X[$i%16],0xf,@X[$i%16] | |
| 224 ___ | |
| 225 } | |
| 226 | |
| 227 $code=<<___; | |
| 228 #ifdef __linux__ | |
| 229 #include <asm/regdef.h> | |
| 230 #else | |
| 231 #include <asm.h> | |
| 232 #include <regdef.h> | |
| 233 #endif | |
| 234 | |
| 235 .text | |
| 236 | |
| 237 .set noat | |
| 238 .set noreorder | |
| 239 .globl sha1_block_data_order | |
| 240 .align 5 | |
| 241 .ent sha1_block_data_order | |
| 242 sha1_block_data_order: | |
| 243 lda sp,-64(sp) | |
| 244 stq ra,0(sp) | |
| 245 stq s0,8(sp) | |
| 246 stq s1,16(sp) | |
| 247 stq s2,24(sp) | |
| 248 stq s3,32(sp) | |
| 249 stq s4,40(sp) | |
| 250 stq s5,48(sp) | |
| 251 stq fp,56(sp) | |
| 252 .mask 0x0400fe00,-64 | |
| 253 .frame sp,64,ra | |
| 254 .prologue 0 | |
| 255 | |
| 256 ldl $A,0($ctx) | |
| 257 ldl $B,4($ctx) | |
| 258 sll $num,6,$num | |
| 259 ldl $C,8($ctx) | |
| 260 ldl $D,12($ctx) | |
| 261 ldl $E,16($ctx) | |
| 262 addq $inp,$num,$num | |
| 263 | |
| 264 .Lloop: | |
| 265 .set noreorder | |
| 266 ldah $K,23170(zero) | |
| 267 zapnot $B,0xf,$B | |
| 268 lda $K,31129($K) # K_00_19 | |
| 269 ___ | |
| 270 for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } | |
| 271 | |
| 272 $code.=<<___; | |
| 273 ldah $K,28378(zero) | |
| 274 lda $K,-5215($K) # K_20_39 | |
| 275 ___ | |
| 276 for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } | |
| 277 | |
| 278 $code.=<<___; | |
| 279 ldah $K,-28900(zero) | |
| 280 lda $K,-17188($K) # K_40_59 | |
| 281 ___ | |
| 282 for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } | |
| 283 | |
| 284 $code.=<<___; | |
| 285 ldah $K,-13725(zero) | |
| 286 lda $K,-15914($K) # K_60_79 | |
| 287 ___ | |
| 288 for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } | |
| 289 | |
| 290 $code.=<<___; | |
| 291 addl @X[0],$A,$A | |
| 292 addl @X[1],$B,$B | |
| 293 addl @X[2],$C,$C | |
| 294 addl @X[3],$D,$D | |
| 295 addl @X[4],$E,$E | |
| 296 stl $A,0($ctx) | |
| 297 stl $B,4($ctx) | |
| 298 addq $inp,64,$inp | |
| 299 stl $C,8($ctx) | |
| 300 stl $D,12($ctx) | |
| 301 stl $E,16($ctx) | |
| 302 cmpult $inp,$num,$t1 | |
| 303 bne $t1,.Lloop | |
| 304 | |
| 305 .set noreorder | |
| 306 ldq ra,0(sp) | |
| 307 ldq s0,8(sp) | |
| 308 ldq s1,16(sp) | |
| 309 ldq s2,24(sp) | |
| 310 ldq s3,32(sp) | |
| 311 ldq s4,40(sp) | |
| 312 ldq s5,48(sp) | |
| 313 ldq fp,56(sp) | |
| 314 lda sp,64(sp) | |
| 315 ret (ra) | |
| 316 .end sha1_block_data_order | |
| 317 .ascii "SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>" | |
| 318 .align 2 | |
| 319 ___ | |
| 320 $output=shift and open STDOUT,">$output"; | |
| 321 print $code; | |
| 322 close STDOUT; | |
| OLD | NEW |