| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/env perl | |
| 2 | |
| 3 # ==================================================================== | |
| 4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | |
| 5 # project. The module is, however, dual licensed under OpenSSL and | |
| 6 # CRYPTOGAMS licenses depending on where you obtain it. For further | |
| 7 # details see http://www.openssl.org/~appro/cryptogams/. | |
| 8 # ==================================================================== | |
| 9 | |
| 10 # SHA256/512 block procedure for PA-RISC. | |
| 11 | |
| 12 # June 2009. | |
| 13 # | |
| 14 # SHA256 performance is >75% better than gcc 3.2 generated code on | |
| 15 # PA-7100LC. Compared to code generated by vendor compiler this | |
| 16 # implementation is almost 70% faster in 64-bit build, but delivers | |
| 17 # virtually same performance in 32-bit build on PA-8600. | |
| 18 # | |
| 19 # SHA512 performance is >2.9x better than gcc 3.2 generated code on | |
| 20 # PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the | |
| 21 # code is executed on PA-RISC 2.0 processor and switches to 64-bit | |
| 22 # code path delivering adequate peformance even in "blended" 32-bit | |
| 23 # build. Though 64-bit code is not any faster than code generated by | |
| 24 # vendor compiler on PA-8600... | |
| 25 # | |
| 26 # Special thanks to polarhome.com for providing HP-UX account. | |
| 27 | |
| 28 $flavour = shift; | |
| 29 $output = shift; | |
| 30 open STDOUT,">$output"; | |
| 31 | |
| 32 if ($flavour =~ /64/) { | |
| 33 $LEVEL ="2.0W"; | |
| 34 $SIZE_T =8; | |
| 35 $FRAME_MARKER =80; | |
| 36 $SAVED_RP =16; | |
| 37 $PUSH ="std"; | |
| 38 $PUSHMA ="std,ma"; | |
| 39 $POP ="ldd"; | |
| 40 $POPMB ="ldd,mb"; | |
| 41 } else { | |
| 42 $LEVEL ="1.0"; | |
| 43 $SIZE_T =4; | |
| 44 $FRAME_MARKER =48; | |
| 45 $SAVED_RP =20; | |
| 46 $PUSH ="stw"; | |
| 47 $PUSHMA ="stwm"; | |
| 48 $POP ="ldw"; | |
| 49 $POPMB ="ldwm"; | |
| 50 } | |
| 51 | |
| 52 if ($output =~ /512/) { | |
| 53 $func="sha512_block_data_order"; | |
| 54 $SZ=8; | |
| 55 @Sigma0=(28,34,39); | |
| 56 @Sigma1=(14,18,41); | |
| 57 @sigma0=(1, 8, 7); | |
| 58 @sigma1=(19,61, 6); | |
| 59 $rounds=80; | |
| 60 $LAST10BITS=0x017; | |
| 61 $LD="ldd"; | |
| 62 $LDM="ldd,ma"; | |
| 63 $ST="std"; | |
| 64 } else { | |
| 65 $func="sha256_block_data_order"; | |
| 66 $SZ=4; | |
| 67 @Sigma0=( 2,13,22); | |
| 68 @Sigma1=( 6,11,25); | |
| 69 @sigma0=( 7,18, 3); | |
| 70 @sigma1=(17,19,10); | |
| 71 $rounds=64; | |
| 72 $LAST10BITS=0x0f2; | |
| 73 $LD="ldw"; | |
| 74 $LDM="ldwm"; | |
| 75 $ST="stw"; | |
| 76 } | |
| 77 | |
| 78 $FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker | |
| 79 # [+ argument transfer] | |
| 80 $XOFF=16*$SZ+32; # local variables | |
| 81 $FRAME+=$XOFF; | |
| 82 $XOFF+=$FRAME_MARKER; # distance between %sp and local variables | |
| 83 | |
| 84 $ctx="%r26"; # zapped by $a0 | |
| 85 $inp="%r25"; # zapped by $a1 | |
| 86 $num="%r24"; # zapped by $t0 | |
| 87 | |
| 88 $a0 ="%r26"; | |
| 89 $a1 ="%r25"; | |
| 90 $t0 ="%r24"; | |
| 91 $t1 ="%r29"; | |
| 92 $Tbl="%r31"; | |
| 93 | |
| 94 @V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","
%r28"); | |
| 95 | |
| 96 @X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", | |
| 97 "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp); | |
| 98 | |
| 99 sub ROUND_00_15 { | |
| 100 my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; | |
| 101 $code.=<<___; | |
| 102 _ror $e,$Sigma1[0],$a0 | |
| 103 and $f,$e,$t0 | |
| 104 _ror $e,$Sigma1[1],$a1 | |
| 105 addl $t1,$h,$h | |
| 106 andcm $g,$e,$t1 | |
| 107 xor $a1,$a0,$a0 | |
| 108 _ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1 | |
| 109 or $t0,$t1,$t1 ; Ch(e,f,g) | |
| 110 addl @X[$i%16],$h,$h | |
| 111 xor $a0,$a1,$a1 ; Sigma1(e) | |
| 112 addl $t1,$h,$h | |
| 113 _ror $a,$Sigma0[0],$a0 | |
| 114 addl $a1,$h,$h | |
| 115 | |
| 116 _ror $a,$Sigma0[1],$a1 | |
| 117 and $a,$b,$t0 | |
| 118 and $a,$c,$t1 | |
| 119 xor $a1,$a0,$a0 | |
| 120 _ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1 | |
| 121 xor $t1,$t0,$t0 | |
| 122 and $b,$c,$t1 | |
| 123 xor $a0,$a1,$a1 ; Sigma0(a) | |
| 124 addl $h,$d,$d | |
| 125 xor $t1,$t0,$t0 ; Maj(a,b,c) | |
| 126 `"$LDM $SZ($Tbl),$t1" if ($i<15)` | |
| 127 addl $a1,$h,$h | |
| 128 addl $t0,$h,$h | |
| 129 | |
| 130 ___ | |
| 131 } | |
| 132 | |
| 133 sub ROUND_16_xx { | |
| 134 my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; | |
| 135 $i-=16; | |
| 136 $code.=<<___; | |
| 137 _ror @X[($i+1)%16],$sigma0[0],$a0 | |
| 138 _ror @X[($i+1)%16],$sigma0[1],$a1 | |
| 139 addl @X[($i+9)%16],@X[$i],@X[$i] | |
| 140 _ror @X[($i+14)%16],$sigma1[0],$t0 | |
| 141 _ror @X[($i+14)%16],$sigma1[1],$t1 | |
| 142 xor $a1,$a0,$a0 | |
| 143 _shr @X[($i+1)%16],$sigma0[2],$a1 | |
| 144 xor $t1,$t0,$t0 | |
| 145 _shr @X[($i+14)%16],$sigma1[2],$t1 | |
| 146 xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f]) | |
| 147 xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f]) | |
| 148 $LDM $SZ($Tbl),$t1 | |
| 149 addl $a0,@X[$i],@X[$i] | |
| 150 addl $t0,@X[$i],@X[$i] | |
| 151 ___ | |
| 152 $code.=<<___ if ($i==15); | |
| 153 extru $t1,31,10,$a1 | |
| 154 comiclr,<> $LAST10BITS,$a1,%r0 | |
| 155 ldo 1($Tbl),$Tbl ; signal end of $Tbl | |
| 156 ___ | |
| 157 &ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h); | |
| 158 } | |
| 159 | |
| 160 $code=<<___; | |
| 161 .LEVEL $LEVEL | |
| 162 .SPACE \$TEXT\$ | |
| 163 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY | |
| 164 | |
| 165 .ALIGN 64 | |
| 166 L\$table | |
| 167 ___ | |
| 168 $code.=<<___ if ($SZ==8); | |
| 169 .WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd | |
| 170 .WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc | |
| 171 .WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019 | |
| 172 .WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118 | |
| 173 .WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe | |
| 174 .WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2 | |
| 175 .WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1 | |
| 176 .WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694 | |
| 177 .WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3 | |
| 178 .WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65 | |
| 179 .WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483 | |
| 180 .WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5 | |
| 181 .WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210 | |
| 182 .WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4 | |
| 183 .WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725 | |
| 184 .WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70 | |
| 185 .WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926 | |
| 186 .WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df | |
| 187 .WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8 | |
| 188 .WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b | |
| 189 .WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001 | |
| 190 .WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30 | |
| 191 .WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910 | |
| 192 .WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8 | |
| 193 .WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53 | |
| 194 .WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8 | |
| 195 .WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb | |
| 196 .WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3 | |
| 197 .WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60 | |
| 198 .WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec | |
| 199 .WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9 | |
| 200 .WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b | |
| 201 .WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207 | |
| 202 .WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178 | |
| 203 .WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6 | |
| 204 .WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b | |
| 205 .WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493 | |
| 206 .WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c | |
| 207 .WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a | |
| 208 .WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817 | |
| 209 ___ | |
| 210 $code.=<<___ if ($SZ==4); | |
| 211 .WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | |
| 212 .WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | |
| 213 .WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | |
| 214 .WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | |
| 215 .WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | |
| 216 .WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | |
| 217 .WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | |
| 218 .WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | |
| 219 .WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | |
| 220 .WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | |
| 221 .WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | |
| 222 .WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | |
| 223 .WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | |
| 224 .WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | |
| 225 .WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | |
| 226 .WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | |
| 227 ___ | |
| 228 $code.=<<___; | |
| 229 | |
| 230 .EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR | |
| 231 .ALIGN 64 | |
| 232 $func | |
| 233 .PROC | |
| 234 .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18 | |
| 235 .ENTRY | |
| 236 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue | |
| 237 $PUSHMA %r3,$FRAME(%sp) | |
| 238 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) | |
| 239 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) | |
| 240 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) | |
| 241 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) | |
| 242 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) | |
| 243 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) | |
| 244 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) | |
| 245 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) | |
| 246 $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) | |
| 247 $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) | |
| 248 $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) | |
| 249 $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) | |
| 250 $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) | |
| 251 $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp) | |
| 252 $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp) | |
| 253 | |
| 254 _shl $num,`log(16*$SZ)/log(2)`,$num | |
| 255 addl $inp,$num,$num ; $num to point at the end of $inp | |
| 256 | |
| 257 $PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments | |
| 258 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) | |
| 259 $PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp) | |
| 260 | |
| 261 blr %r0,$Tbl | |
| 262 ldi 3,$t1 | |
| 263 L\$pic | |
| 264 andcm $Tbl,$t1,$Tbl ; wipe privilege level | |
| 265 ldo L\$table-L\$pic($Tbl),$Tbl | |
| 266 ___ | |
| 267 $code.=<<___ if ($SZ==8 && $SIZE_T==4); | |
| 268 ldi 31,$t1 | |
| 269 mtctl $t1,%cr11 | |
| 270 extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0 | |
| 271 b L\$parisc1 | |
| 272 nop | |
| 273 ___ | |
| 274 $code.=<<___; | |
| 275 $LD `0*$SZ`($ctx),$A ; load context | |
| 276 $LD `1*$SZ`($ctx),$B | |
| 277 $LD `2*$SZ`($ctx),$C | |
| 278 $LD `3*$SZ`($ctx),$D | |
| 279 $LD `4*$SZ`($ctx),$E | |
| 280 $LD `5*$SZ`($ctx),$F | |
| 281 $LD `6*$SZ`($ctx),$G | |
| 282 $LD `7*$SZ`($ctx),$H | |
| 283 | |
| 284 extru $inp,31,`log($SZ)/log(2)`,$t0 | |
| 285 sh3addl $t0,%r0,$t0 | |
| 286 subi `8*$SZ`,$t0,$t0 | |
| 287 mtctl $t0,%cr11 ; load %sar with align factor | |
| 288 | |
| 289 L\$oop | |
| 290 ldi `$SZ-1`,$t0 | |
| 291 $LDM $SZ($Tbl),$t1 | |
| 292 andcm $inp,$t0,$t0 ; align $inp | |
| 293 ___ | |
| 294 for ($i=0;$i<15;$i++) { # load input block | |
| 295 $code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; } | |
| 296 $code.=<<___; | |
| 297 cmpb,*= $inp,$t0,L\$aligned | |
| 298 $LD `$SZ*15`($t0),@X[15] | |
| 299 $LD `$SZ*16`($t0),@X[16] | |
| 300 ___ | |
| 301 for ($i=0;$i<16;$i++) { # align data | |
| 302 $code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; } | |
| 303 $code.=<<___; | |
| 304 L\$aligned | |
| 305 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD | |
| 306 ___ | |
| 307 | |
| 308 for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); } | |
| 309 $code.=<<___; | |
| 310 L\$rounds | |
| 311 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD | |
| 312 ___ | |
| 313 for(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); } | |
| 314 $code.=<<___; | |
| 315 bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled? | |
| 316 nop | |
| 317 | |
| 318 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments | |
| 319 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp | |
| 320 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num | |
| 321 ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl | |
| 322 | |
| 323 $LD `0*$SZ`($ctx),@X[0] ; load context | |
| 324 $LD `1*$SZ`($ctx),@X[1] | |
| 325 $LD `2*$SZ`($ctx),@X[2] | |
| 326 $LD `3*$SZ`($ctx),@X[3] | |
| 327 $LD `4*$SZ`($ctx),@X[4] | |
| 328 $LD `5*$SZ`($ctx),@X[5] | |
| 329 addl @X[0],$A,$A | |
| 330 $LD `6*$SZ`($ctx),@X[6] | |
| 331 addl @X[1],$B,$B | |
| 332 $LD `7*$SZ`($ctx),@X[7] | |
| 333 ldo `16*$SZ`($inp),$inp ; advance $inp | |
| 334 | |
| 335 $ST $A,`0*$SZ`($ctx) ; save context | |
| 336 addl @X[2],$C,$C | |
| 337 $ST $B,`1*$SZ`($ctx) | |
| 338 addl @X[3],$D,$D | |
| 339 $ST $C,`2*$SZ`($ctx) | |
| 340 addl @X[4],$E,$E | |
| 341 $ST $D,`3*$SZ`($ctx) | |
| 342 addl @X[5],$F,$F | |
| 343 $ST $E,`4*$SZ`($ctx) | |
| 344 addl @X[6],$G,$G | |
| 345 $ST $F,`5*$SZ`($ctx) | |
| 346 addl @X[7],$H,$H | |
| 347 $ST $G,`6*$SZ`($ctx) | |
| 348 $ST $H,`7*$SZ`($ctx) | |
| 349 | |
| 350 cmpb,*<>,n $inp,$num,L\$oop | |
| 351 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp | |
| 352 ___ | |
| 353 if ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0 | |
| 354 {{ | |
| 355 $code.=<<___; | |
| 356 b L\$done | |
| 357 nop | |
| 358 | |
| 359 .ALIGN 64 | |
| 360 L\$parisc1 | |
| 361 ___ | |
| 362 | |
| 363 @V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo, | |
| 364 $Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) = | |
| 365 ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", | |
| 366 "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16"); | |
| 367 $a0 ="%r17"; | |
| 368 $a1 ="%r18"; | |
| 369 $a2 ="%r19"; | |
| 370 $a3 ="%r20"; | |
| 371 $t0 ="%r21"; | |
| 372 $t1 ="%r22"; | |
| 373 $t2 ="%r28"; | |
| 374 $t3 ="%r29"; | |
| 375 $Tbl="%r31"; | |
| 376 | |
| 377 @X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx | |
| 378 | |
| 379 sub ROUND_00_15_pa1 { | |
| 380 my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo, | |
| 381 $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_; | |
| 382 my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X; | |
| 383 | |
| 384 $code.=<<___ if (!$flag); | |
| 385 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi | |
| 386 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1] | |
| 387 ___ | |
| 388 $code.=<<___; | |
| 389 shd $ehi,$elo,$Sigma1[0],$t0 | |
| 390 add $Xlo,$hlo,$hlo | |
| 391 shd $elo,$ehi,$Sigma1[0],$t1 | |
| 392 addc $Xhi,$hhi,$hhi ; h += X[i] | |
| 393 shd $ehi,$elo,$Sigma1[1],$t2 | |
| 394 ldwm 8($Tbl),$Xhi | |
| 395 shd $elo,$ehi,$Sigma1[1],$t3 | |
| 396 ldw -4($Tbl),$Xlo ; load K[i] | |
| 397 xor $t2,$t0,$t0 | |
| 398 xor $t3,$t1,$t1 | |
| 399 and $flo,$elo,$a0 | |
| 400 and $fhi,$ehi,$a1 | |
| 401 shd $ehi,$elo,$Sigma1[2],$t2 | |
| 402 andcm $glo,$elo,$a2 | |
| 403 shd $elo,$ehi,$Sigma1[2],$t3 | |
| 404 andcm $ghi,$ehi,$a3 | |
| 405 xor $t2,$t0,$t0 | |
| 406 xor $t3,$t1,$t1 ; Sigma1(e) | |
| 407 add $Xlo,$hlo,$hlo | |
| 408 xor $a2,$a0,$a0 | |
| 409 addc $Xhi,$hhi,$hhi ; h += K[i] | |
| 410 xor $a3,$a1,$a1 ; Ch(e,f,g) | |
| 411 | |
| 412 add $t0,$hlo,$hlo | |
| 413 shd $ahi,$alo,$Sigma0[0],$t0 | |
| 414 addc $t1,$hhi,$hhi ; h += Sigma1(e) | |
| 415 shd $alo,$ahi,$Sigma0[0],$t1 | |
| 416 add $a0,$hlo,$hlo | |
| 417 shd $ahi,$alo,$Sigma0[1],$t2 | |
| 418 addc $a1,$hhi,$hhi ; h += Ch(e,f,g) | |
| 419 shd $alo,$ahi,$Sigma0[1],$t3 | |
| 420 | |
| 421 xor $t2,$t0,$t0 | |
| 422 xor $t3,$t1,$t1 | |
| 423 shd $ahi,$alo,$Sigma0[2],$t2 | |
| 424 and $alo,$blo,$a0 | |
| 425 shd $alo,$ahi,$Sigma0[2],$t3 | |
| 426 and $ahi,$bhi,$a1 | |
| 427 xor $t2,$t0,$t0 | |
| 428 xor $t3,$t1,$t1 ; Sigma0(a) | |
| 429 | |
| 430 and $alo,$clo,$a2 | |
| 431 and $ahi,$chi,$a3 | |
| 432 xor $a2,$a0,$a0 | |
| 433 add $hlo,$dlo,$dlo | |
| 434 xor $a3,$a1,$a1 | |
| 435 addc $hhi,$dhi,$dhi ; d += h | |
| 436 and $blo,$clo,$a2 | |
| 437 add $t0,$hlo,$hlo | |
| 438 and $bhi,$chi,$a3 | |
| 439 addc $t1,$hhi,$hhi ; h += Sigma0(a) | |
| 440 xor $a2,$a0,$a0 | |
| 441 add $a0,$hlo,$hlo | |
| 442 xor $a3,$a1,$a1 ; Maj(a,b,c) | |
| 443 addc $a1,$hhi,$hhi ; h += Maj(a,b,c) | |
| 444 | |
| 445 ___ | |
| 446 $code.=<<___ if ($i==15 && $flag); | |
| 447 extru $Xlo,31,10,$Xlo | |
| 448 comiclr,= $LAST10BITS,$Xlo,%r0 | |
| 449 b L\$rounds_pa1 | |
| 450 nop | |
| 451 ___ | |
| 452 push(@X,shift(@X)); push(@X,shift(@X)); | |
| 453 } | |
| 454 | |
| 455 sub ROUND_16_xx_pa1 { | |
| 456 my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X; | |
| 457 my ($i)=shift; | |
| 458 $i-=16; | |
| 459 $code.=<<___; | |
| 460 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi | |
| 461 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1] | |
| 462 ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1 | |
| 463 ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9] | |
| 464 ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3 | |
| 465 ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14] | |
| 466 shd $Xnhi,$Xnlo,$sigma0[0],$t0 | |
| 467 shd $Xnlo,$Xnhi,$sigma0[0],$t1 | |
| 468 add $a0,$Xlo,$Xlo | |
| 469 shd $Xnhi,$Xnlo,$sigma0[1],$t2 | |
| 470 addc $a1,$Xhi,$Xhi | |
| 471 shd $Xnlo,$Xnhi,$sigma0[1],$t3 | |
| 472 xor $t2,$t0,$t0 | |
| 473 shd $Xnhi,$Xnlo,$sigma0[2],$t2 | |
| 474 xor $t3,$t1,$t1 | |
| 475 extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3 | |
| 476 xor $t2,$t0,$t0 | |
| 477 shd $a3,$a2,$sigma1[0],$a0 | |
| 478 xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f]) | |
| 479 shd $a2,$a3,$sigma1[0],$a1 | |
| 480 add $t0,$Xlo,$Xlo | |
| 481 shd $a3,$a2,$sigma1[1],$t2 | |
| 482 addc $t1,$Xhi,$Xhi | |
| 483 shd $a2,$a3,$sigma1[1],$t3 | |
| 484 xor $t2,$a0,$a0 | |
| 485 shd $a3,$a2,$sigma1[2],$t2 | |
| 486 xor $t3,$a1,$a1 | |
| 487 extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3 | |
| 488 xor $t2,$a0,$a0 | |
| 489 xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f]) | |
| 490 add $a0,$Xlo,$Xlo | |
| 491 addc $a1,$Xhi,$Xhi | |
| 492 | |
| 493 stw $Xhi,`-$XOFF+8*($i%16)`(%sp) | |
| 494 stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp) | |
| 495 ___ | |
| 496 &ROUND_00_15_pa1($i,@_,1); | |
| 497 } | |
| 498 $code.=<<___; | |
| 499 ldw `0*4`($ctx),$Ahi ; load context | |
| 500 ldw `1*4`($ctx),$Alo | |
| 501 ldw `2*4`($ctx),$Bhi | |
| 502 ldw `3*4`($ctx),$Blo | |
| 503 ldw `4*4`($ctx),$Chi | |
| 504 ldw `5*4`($ctx),$Clo | |
| 505 ldw `6*4`($ctx),$Dhi | |
| 506 ldw `7*4`($ctx),$Dlo | |
| 507 ldw `8*4`($ctx),$Ehi | |
| 508 ldw `9*4`($ctx),$Elo | |
| 509 ldw `10*4`($ctx),$Fhi | |
| 510 ldw `11*4`($ctx),$Flo | |
| 511 ldw `12*4`($ctx),$Ghi | |
| 512 ldw `13*4`($ctx),$Glo | |
| 513 ldw `14*4`($ctx),$Hhi | |
| 514 ldw `15*4`($ctx),$Hlo | |
| 515 | |
| 516 extru $inp,31,2,$t0 | |
| 517 sh3addl $t0,%r0,$t0 | |
| 518 subi 32,$t0,$t0 | |
| 519 mtctl $t0,%cr11 ; load %sar with align factor | |
| 520 | |
| 521 L\$oop_pa1 | |
| 522 extru $inp,31,2,$a3 | |
| 523 comib,= 0,$a3,L\$aligned_pa1 | |
| 524 sub $inp,$a3,$inp | |
| 525 | |
| 526 ldw `0*4`($inp),$X[0] | |
| 527 ldw `1*4`($inp),$X[1] | |
| 528 ldw `2*4`($inp),$t2 | |
| 529 ldw `3*4`($inp),$t3 | |
| 530 ldw `4*4`($inp),$a0 | |
| 531 ldw `5*4`($inp),$a1 | |
| 532 ldw `6*4`($inp),$a2 | |
| 533 ldw `7*4`($inp),$a3 | |
| 534 vshd $X[0],$X[1],$X[0] | |
| 535 vshd $X[1],$t2,$X[1] | |
| 536 stw $X[0],`-$XOFF+0*4`(%sp) | |
| 537 ldw `8*4`($inp),$t0 | |
| 538 vshd $t2,$t3,$t2 | |
| 539 stw $X[1],`-$XOFF+1*4`(%sp) | |
| 540 ldw `9*4`($inp),$t1 | |
| 541 vshd $t3,$a0,$t3 | |
| 542 ___ | |
| 543 { | |
| 544 my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1); | |
| 545 for ($i=2;$i<=(128/4-8);$i++) { | |
| 546 $code.=<<___; | |
| 547 stw $t[0],`-$XOFF+$i*4`(%sp) | |
| 548 ldw `(8+$i)*4`($inp),$t[0] | |
| 549 vshd $t[1],$t[2],$t[1] | |
| 550 ___ | |
| 551 push(@t,shift(@t)); | |
| 552 } | |
| 553 for (;$i<(128/4-1);$i++) { | |
| 554 $code.=<<___; | |
| 555 stw $t[0],`-$XOFF+$i*4`(%sp) | |
| 556 vshd $t[1],$t[2],$t[1] | |
| 557 ___ | |
| 558 push(@t,shift(@t)); | |
| 559 } | |
| 560 $code.=<<___; | |
| 561 b L\$collected_pa1 | |
| 562 stw $t[0],`-$XOFF+$i*4`(%sp) | |
| 563 | |
| 564 ___ | |
| 565 } | |
| 566 $code.=<<___; | |
| 567 L\$aligned_pa1 | |
| 568 ldw `0*4`($inp),$X[0] | |
| 569 ldw `1*4`($inp),$X[1] | |
| 570 ldw `2*4`($inp),$t2 | |
| 571 ldw `3*4`($inp),$t3 | |
| 572 ldw `4*4`($inp),$a0 | |
| 573 ldw `5*4`($inp),$a1 | |
| 574 ldw `6*4`($inp),$a2 | |
| 575 ldw `7*4`($inp),$a3 | |
| 576 stw $X[0],`-$XOFF+0*4`(%sp) | |
| 577 ldw `8*4`($inp),$t0 | |
| 578 stw $X[1],`-$XOFF+1*4`(%sp) | |
| 579 ldw `9*4`($inp),$t1 | |
| 580 ___ | |
| 581 { | |
| 582 my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1); | |
| 583 for ($i=2;$i<(128/4-8);$i++) { | |
| 584 $code.=<<___; | |
| 585 stw $t[0],`-$XOFF+$i*4`(%sp) | |
| 586 ldw `(8+$i)*4`($inp),$t[0] | |
| 587 ___ | |
| 588 push(@t,shift(@t)); | |
| 589 } | |
| 590 for (;$i<128/4;$i++) { | |
| 591 $code.=<<___; | |
| 592 stw $t[0],`-$XOFF+$i*4`(%sp) | |
| 593 ___ | |
| 594 push(@t,shift(@t)); | |
| 595 } | |
| 596 $code.="L\$collected_pa1\n"; | |
| 597 } | |
| 598 | |
| 599 for($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(
@V,pop(@V)); } | |
| 600 $code.="L\$rounds_pa1\n"; | |
| 601 for(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(
@V,pop(@V)); } | |
| 602 | |
| 603 $code.=<<___; | |
| 604 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments | |
| 605 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp | |
| 606 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num | |
| 607 ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl | |
| 608 | |
| 609 ldw `0*4`($ctx),$t1 ; update context | |
| 610 ldw `1*4`($ctx),$t0 | |
| 611 ldw `2*4`($ctx),$t3 | |
| 612 ldw `3*4`($ctx),$t2 | |
| 613 ldw `4*4`($ctx),$a1 | |
| 614 ldw `5*4`($ctx),$a0 | |
| 615 ldw `6*4`($ctx),$a3 | |
| 616 add $t0,$Alo,$Alo | |
| 617 ldw `7*4`($ctx),$a2 | |
| 618 addc $t1,$Ahi,$Ahi | |
| 619 ldw `8*4`($ctx),$t1 | |
| 620 add $t2,$Blo,$Blo | |
| 621 ldw `9*4`($ctx),$t0 | |
| 622 addc $t3,$Bhi,$Bhi | |
| 623 ldw `10*4`($ctx),$t3 | |
| 624 add $a0,$Clo,$Clo | |
| 625 ldw `11*4`($ctx),$t2 | |
| 626 addc $a1,$Chi,$Chi | |
| 627 ldw `12*4`($ctx),$a1 | |
| 628 add $a2,$Dlo,$Dlo | |
| 629 ldw `13*4`($ctx),$a0 | |
| 630 addc $a3,$Dhi,$Dhi | |
| 631 ldw `14*4`($ctx),$a3 | |
| 632 add $t0,$Elo,$Elo | |
| 633 ldw `15*4`($ctx),$a2 | |
| 634 addc $t1,$Ehi,$Ehi | |
| 635 stw $Ahi,`0*4`($ctx) | |
| 636 add $t2,$Flo,$Flo | |
| 637 stw $Alo,`1*4`($ctx) | |
| 638 addc $t3,$Fhi,$Fhi | |
| 639 stw $Bhi,`2*4`($ctx) | |
| 640 add $a0,$Glo,$Glo | |
| 641 stw $Blo,`3*4`($ctx) | |
| 642 addc $a1,$Ghi,$Ghi | |
| 643 stw $Chi,`4*4`($ctx) | |
| 644 add $a2,$Hlo,$Hlo | |
| 645 stw $Clo,`5*4`($ctx) | |
| 646 addc $a3,$Hhi,$Hhi | |
| 647 stw $Dhi,`6*4`($ctx) | |
| 648 ldo `16*$SZ`($inp),$inp ; advance $inp | |
| 649 stw $Dlo,`7*4`($ctx) | |
| 650 stw $Ehi,`8*4`($ctx) | |
| 651 stw $Elo,`9*4`($ctx) | |
| 652 stw $Fhi,`10*4`($ctx) | |
| 653 stw $Flo,`11*4`($ctx) | |
| 654 stw $Ghi,`12*4`($ctx) | |
| 655 stw $Glo,`13*4`($ctx) | |
| 656 stw $Hhi,`14*4`($ctx) | |
| 657 comb,= $inp,$num,L\$done | |
| 658 stw $Hlo,`15*4`($ctx) | |
| 659 b L\$oop_pa1 | |
| 660 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp | |
| 661 L\$done | |
| 662 ___ | |
| 663 }} | |
| 664 $code.=<<___; | |
| 665 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue | |
| 666 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 | |
| 667 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 | |
| 668 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 | |
| 669 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 | |
| 670 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 | |
| 671 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 | |
| 672 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 | |
| 673 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 | |
| 674 $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 | |
| 675 $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 | |
| 676 $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 | |
| 677 $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 | |
| 678 $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 | |
| 679 $POP `-$FRAME+14*$SIZE_T`(%sp),%r17 | |
| 680 $POP `-$FRAME+15*$SIZE_T`(%sp),%r18 | |
| 681 bv (%r2) | |
| 682 .EXIT | |
| 683 $POPMB -$FRAME(%sp),%r3 | |
| 684 .PROCEND | |
| 685 .STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\
@openssl.org>" | |
| 686 ___ | |
| 687 | |
| 688 # Explicitly encode PA-RISC 2.0 instructions used in this module, so | |
| 689 # that it can be compiled with .LEVEL 1.0. It should be noted that I | |
| 690 # wouldn't have to do this, if GNU assembler understood .ALLOW 2.0 | |
| 691 # directive... | |
| 692 | |
| 693 my $ldd = sub { | |
| 694 my ($mod,$args) = @_; | |
| 695 my $orig = "ldd$mod\t$args"; | |
| 696 | |
| 697 if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices | |
| 698 { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1); | |
| 699 $opcode|=(1<<3) if ($mod =~ /^,m/); | |
| 700 $opcode|=(1<<2) if ($mod =~ /^,mb/); | |
| 701 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; | |
| 702 } | |
| 703 else { "\t".$orig; } | |
| 704 }; | |
| 705 | |
| 706 my $std = sub { | |
| 707 my ($mod,$args) = @_; | |
| 708 my $orig = "std$mod\t$args"; | |
| 709 | |
| 710 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices | |
| 711 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1); | |
| 712 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; | |
| 713 } | |
| 714 else { "\t".$orig; } | |
| 715 }; | |
| 716 | |
| 717 my $extrd = sub { | |
| 718 my ($mod,$args) = @_; | |
| 719 my $orig = "extrd$mod\t$args"; | |
| 720 | |
| 721 # I only have ",u" completer, it's implicitly encoded... | |
| 722 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15 | |
| 723 { my $opcode=(0x36<<26)|($1<<21)|($4<<16); | |
| 724 my $len=32-$3; | |
| 725 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos | |
| 726 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len | |
| 727 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; | |
| 728 } | |
| 729 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12 | |
| 730 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9); | |
| 731 my $len=32-$2; | |
| 732 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len | |
| 733 $opcode |= (1<<13) if ($mod =~ /,\**=/); | |
| 734 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; | |
| 735 } | |
| 736 else { "\t".$orig; } | |
| 737 }; | |
| 738 | |
| 739 my $shrpd = sub { | |
| 740 my ($mod,$args) = @_; | |
| 741 my $orig = "shrpd$mod\t$args"; | |
| 742 | |
| 743 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14 | |
| 744 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4; | |
| 745 my $cpos=63-$3; | |
| 746 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa | |
| 747 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; | |
| 748 } | |
| 749 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11 | |
| 750 { sprintf "\t.WORD\t0x%08x\t; %s", | |
| 751 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig; | |
| 752 } | |
| 753 else { "\t".$orig; } | |
| 754 }; | |
| 755 | |
| 756 sub assemble { | |
| 757 my ($mnemonic,$mod,$args)=@_; | |
| 758 my $opcode = eval("\$$mnemonic"); | |
| 759 | |
| 760 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; | |
| 761 } | |
| 762 | |
| 763 foreach (split("\n",$code)) { | |
| 764 s/\`([^\`]*)\`/eval $1/ge; | |
| 765 | |
| 766 s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/ | |
| 767 $3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for >
=32 | |
| 768 : sprintf("shd\t%$1,%$2,%d",$3)/e or | |
| 769 # translate made up instructons: _ror, _shr, _align, _shl | |
| 770 s/_ror(\s+)(%r[0-9]+),/ | |
| 771 ($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or | |
| 772 | |
| 773 s/_shr(\s+%r[0-9]+),([0-9]+),/ | |
| 774 $SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2) | |
| 775 : sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or | |
| 776 | |
| 777 s/_align(\s+%r[0-9]+,%r[0-9]+),/ | |
| 778 ($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or | |
| 779 | |
| 780 s/_shl(\s+%r[0-9]+),([0-9]+),/ | |
| 781 $SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2) | |
| 782 : sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e; | |
| 783 | |
| 784 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4); | |
| 785 | |
| 786 s/cmpb,\*/comb,/ if ($SIZE_T==4); | |
| 787 | |
| 788 print $_,"\n"; | |
| 789 } | |
| 790 | |
| 791 close STDOUT; | |
| OLD | NEW |