| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/env perl | |
| 2 | |
| 3 # ==================================================================== | |
| 4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | |
| 5 # project. The module is, however, dual licensed under OpenSSL and | |
| 6 # CRYPTOGAMS licenses depending on where you obtain it. For further | |
| 7 # details see http://www.openssl.org/~appro/cryptogams/. | |
| 8 # ==================================================================== | |
| 9 | |
| 10 # I let hardware handle unaligned input(*), except on page boundaries | |
| 11 # (see below for details). Otherwise straightforward implementation | |
| 12 # with X vector in register bank. The module is big-endian [which is | |
| 13 # not big deal as there're no little-endian targets left around]. | |
| 14 # | |
| 15 # (*) this means that this module is inappropriate for PPC403? Does | |
| 16 # anybody know if pre-POWER3 can sustain unaligned load? | |
| 17 | |
| 18 # -m64 -m32 | |
| 19 # ---------------------------------- | |
| 20 # PPC970,gcc-4.0.0 +76% +59% | |
| 21 # Power6,xlc-7 +68% +33% | |
| 22 | |
| 23 $flavour = shift; | |
| 24 | |
| 25 if ($flavour =~ /64/) { | |
| 26 $SIZE_T =8; | |
| 27 $LRSAVE =2*$SIZE_T; | |
| 28 $UCMP ="cmpld"; | |
| 29 $STU ="stdu"; | |
| 30 $POP ="ld"; | |
| 31 $PUSH ="std"; | |
| 32 } elsif ($flavour =~ /32/) { | |
| 33 $SIZE_T =4; | |
| 34 $LRSAVE =$SIZE_T; | |
| 35 $UCMP ="cmplw"; | |
| 36 $STU ="stwu"; | |
| 37 $POP ="lwz"; | |
| 38 $PUSH ="stw"; | |
| 39 } else { die "nonsense $flavour"; } | |
| 40 | |
| 41 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | |
| 42 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or | |
| 43 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or | |
| 44 die "can't locate ppc-xlate.pl"; | |
| 45 | |
| 46 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; | |
| 47 | |
| 48 $FRAME=24*$SIZE_T+64; | |
| 49 $LOCALS=6*$SIZE_T; | |
| 50 | |
| 51 $K ="r0"; | |
| 52 $sp ="r1"; | |
| 53 $toc="r2"; | |
| 54 $ctx="r3"; | |
| 55 $inp="r4"; | |
| 56 $num="r5"; | |
| 57 $t0 ="r15"; | |
| 58 $t1 ="r6"; | |
| 59 | |
| 60 $A ="r7"; | |
| 61 $B ="r8"; | |
| 62 $C ="r9"; | |
| 63 $D ="r10"; | |
| 64 $E ="r11"; | |
| 65 $T ="r12"; | |
| 66 | |
| 67 @V=($A,$B,$C,$D,$E,$T); | |
| 68 @X=("r16","r17","r18","r19","r20","r21","r22","r23", | |
| 69 "r24","r25","r26","r27","r28","r29","r30","r31"); | |
| 70 | |
| 71 sub BODY_00_19 { | |
| 72 my ($i,$a,$b,$c,$d,$e,$f)=@_; | |
| 73 my $j=$i+1; | |
| 74 $code.=<<___ if ($i==0); | |
| 75 lwz @X[$i],`$i*4`($inp) | |
| 76 ___ | |
| 77 $code.=<<___ if ($i<15); | |
| 78 lwz @X[$j],`$j*4`($inp) | |
| 79 add $f,$K,$e | |
| 80 rotlwi $e,$a,5 | |
| 81 add $f,$f,@X[$i] | |
| 82 and $t0,$c,$b | |
| 83 add $f,$f,$e | |
| 84 andc $t1,$d,$b | |
| 85 rotlwi $b,$b,30 | |
| 86 or $t0,$t0,$t1 | |
| 87 add $f,$f,$t0 | |
| 88 ___ | |
| 89 $code.=<<___ if ($i>=15); | |
| 90 add $f,$K,$e | |
| 91 rotlwi $e,$a,5 | |
| 92 xor @X[$j%16],@X[$j%16],@X[($j+2)%16] | |
| 93 add $f,$f,@X[$i%16] | |
| 94 and $t0,$c,$b | |
| 95 xor @X[$j%16],@X[$j%16],@X[($j+8)%16] | |
| 96 add $f,$f,$e | |
| 97 andc $t1,$d,$b | |
| 98 rotlwi $b,$b,30 | |
| 99 or $t0,$t0,$t1 | |
| 100 xor @X[$j%16],@X[$j%16],@X[($j+13)%16] | |
| 101 add $f,$f,$t0 | |
| 102 rotlwi @X[$j%16],@X[$j%16],1 | |
| 103 ___ | |
| 104 } | |
| 105 | |
| 106 sub BODY_20_39 { | |
| 107 my ($i,$a,$b,$c,$d,$e,$f)=@_; | |
| 108 my $j=$i+1; | |
| 109 $code.=<<___ if ($i<79); | |
| 110 add $f,$K,$e | |
| 111 rotlwi $e,$a,5 | |
| 112 xor @X[$j%16],@X[$j%16],@X[($j+2)%16] | |
| 113 add $f,$f,@X[$i%16] | |
| 114 xor $t0,$b,$c | |
| 115 xor @X[$j%16],@X[$j%16],@X[($j+8)%16] | |
| 116 add $f,$f,$e | |
| 117 rotlwi $b,$b,30 | |
| 118 xor $t0,$t0,$d | |
| 119 xor @X[$j%16],@X[$j%16],@X[($j+13)%16] | |
| 120 add $f,$f,$t0 | |
| 121 rotlwi @X[$j%16],@X[$j%16],1 | |
| 122 ___ | |
| 123 $code.=<<___ if ($i==79); | |
| 124 add $f,$K,$e | |
| 125 rotlwi $e,$a,5 | |
| 126 lwz r16,0($ctx) | |
| 127 add $f,$f,@X[$i%16] | |
| 128 xor $t0,$b,$c | |
| 129 lwz r17,4($ctx) | |
| 130 add $f,$f,$e | |
| 131 rotlwi $b,$b,30 | |
| 132 lwz r18,8($ctx) | |
| 133 xor $t0,$t0,$d | |
| 134 lwz r19,12($ctx) | |
| 135 add $f,$f,$t0 | |
| 136 lwz r20,16($ctx) | |
| 137 ___ | |
| 138 } | |
| 139 | |
| 140 sub BODY_40_59 { | |
| 141 my ($i,$a,$b,$c,$d,$e,$f)=@_; | |
| 142 my $j=$i+1; | |
| 143 $code.=<<___; | |
| 144 add $f,$K,$e | |
| 145 rotlwi $e,$a,5 | |
| 146 xor @X[$j%16],@X[$j%16],@X[($j+2)%16] | |
| 147 add $f,$f,@X[$i%16] | |
| 148 and $t0,$b,$c | |
| 149 xor @X[$j%16],@X[$j%16],@X[($j+8)%16] | |
| 150 add $f,$f,$e | |
| 151 or $t1,$b,$c | |
| 152 rotlwi $b,$b,30 | |
| 153 xor @X[$j%16],@X[$j%16],@X[($j+13)%16] | |
| 154 and $t1,$t1,$d | |
| 155 or $t0,$t0,$t1 | |
| 156 rotlwi @X[$j%16],@X[$j%16],1 | |
| 157 add $f,$f,$t0 | |
| 158 ___ | |
| 159 } | |
| 160 | |
| 161 $code=<<___; | |
| 162 .machine "any" | |
| 163 .text | |
| 164 | |
| 165 .globl .sha1_block_data_order | |
| 166 .align 4 | |
| 167 .sha1_block_data_order: | |
| 168 $STU $sp,-$FRAME($sp) | |
| 169 mflr r0 | |
| 170 $PUSH r15,`$FRAME-$SIZE_T*17`($sp) | |
| 171 $PUSH r16,`$FRAME-$SIZE_T*16`($sp) | |
| 172 $PUSH r17,`$FRAME-$SIZE_T*15`($sp) | |
| 173 $PUSH r18,`$FRAME-$SIZE_T*14`($sp) | |
| 174 $PUSH r19,`$FRAME-$SIZE_T*13`($sp) | |
| 175 $PUSH r20,`$FRAME-$SIZE_T*12`($sp) | |
| 176 $PUSH r21,`$FRAME-$SIZE_T*11`($sp) | |
| 177 $PUSH r22,`$FRAME-$SIZE_T*10`($sp) | |
| 178 $PUSH r23,`$FRAME-$SIZE_T*9`($sp) | |
| 179 $PUSH r24,`$FRAME-$SIZE_T*8`($sp) | |
| 180 $PUSH r25,`$FRAME-$SIZE_T*7`($sp) | |
| 181 $PUSH r26,`$FRAME-$SIZE_T*6`($sp) | |
| 182 $PUSH r27,`$FRAME-$SIZE_T*5`($sp) | |
| 183 $PUSH r28,`$FRAME-$SIZE_T*4`($sp) | |
| 184 $PUSH r29,`$FRAME-$SIZE_T*3`($sp) | |
| 185 $PUSH r30,`$FRAME-$SIZE_T*2`($sp) | |
| 186 $PUSH r31,`$FRAME-$SIZE_T*1`($sp) | |
| 187 $PUSH r0,`$FRAME+$LRSAVE`($sp) | |
| 188 lwz $A,0($ctx) | |
| 189 lwz $B,4($ctx) | |
| 190 lwz $C,8($ctx) | |
| 191 lwz $D,12($ctx) | |
| 192 lwz $E,16($ctx) | |
| 193 andi. r0,$inp,3 | |
| 194 bne Lunaligned | |
| 195 Laligned: | |
| 196 mtctr $num | |
| 197 bl Lsha1_block_private | |
| 198 b Ldone | |
| 199 | |
| 200 ; PowerPC specification allows an implementation to be ill-behaved | |
| 201 ; upon unaligned access which crosses page boundary. "Better safe | |
| 202 ; than sorry" principle makes me treat it specially. But I don't | |
| 203 ; look for particular offending word, but rather for 64-byte input | |
| 204 ; block which crosses the boundary. Once found that block is aligned | |
| 205 ; and hashed separately... | |
| 206 .align 4 | |
| 207 Lunaligned: | |
| 208 subfic $t1,$inp,4096 | |
| 209 andi. $t1,$t1,4095 ; distance to closest page boundary | |
| 210 srwi. $t1,$t1,6 ; t1/=64 | |
| 211 beq Lcross_page | |
| 212 $UCMP $num,$t1 | |
| 213 ble- Laligned ; didn't cross the page boundary | |
| 214 mtctr $t1 | |
| 215 subfc $num,$t1,$num | |
| 216 bl Lsha1_block_private | |
| 217 Lcross_page: | |
| 218 li $t1,16 | |
| 219 mtctr $t1 | |
| 220 addi r20,$sp,$LOCALS ; spot within the frame | |
| 221 Lmemcpy: | |
| 222 lbz r16,0($inp) | |
| 223 lbz r17,1($inp) | |
| 224 lbz r18,2($inp) | |
| 225 lbz r19,3($inp) | |
| 226 addi $inp,$inp,4 | |
| 227 stb r16,0(r20) | |
| 228 stb r17,1(r20) | |
| 229 stb r18,2(r20) | |
| 230 stb r19,3(r20) | |
| 231 addi r20,r20,4 | |
| 232 bdnz Lmemcpy | |
| 233 | |
| 234 $PUSH $inp,`$FRAME-$SIZE_T*18`($sp) | |
| 235 li $t1,1 | |
| 236 addi $inp,$sp,$LOCALS | |
| 237 mtctr $t1 | |
| 238 bl Lsha1_block_private | |
| 239 $POP $inp,`$FRAME-$SIZE_T*18`($sp) | |
| 240 addic. $num,$num,-1 | |
| 241 bne- Lunaligned | |
| 242 | |
| 243 Ldone: | |
| 244 $POP r0,`$FRAME+$LRSAVE`($sp) | |
| 245 $POP r15,`$FRAME-$SIZE_T*17`($sp) | |
| 246 $POP r16,`$FRAME-$SIZE_T*16`($sp) | |
| 247 $POP r17,`$FRAME-$SIZE_T*15`($sp) | |
| 248 $POP r18,`$FRAME-$SIZE_T*14`($sp) | |
| 249 $POP r19,`$FRAME-$SIZE_T*13`($sp) | |
| 250 $POP r20,`$FRAME-$SIZE_T*12`($sp) | |
| 251 $POP r21,`$FRAME-$SIZE_T*11`($sp) | |
| 252 $POP r22,`$FRAME-$SIZE_T*10`($sp) | |
| 253 $POP r23,`$FRAME-$SIZE_T*9`($sp) | |
| 254 $POP r24,`$FRAME-$SIZE_T*8`($sp) | |
| 255 $POP r25,`$FRAME-$SIZE_T*7`($sp) | |
| 256 $POP r26,`$FRAME-$SIZE_T*6`($sp) | |
| 257 $POP r27,`$FRAME-$SIZE_T*5`($sp) | |
| 258 $POP r28,`$FRAME-$SIZE_T*4`($sp) | |
| 259 $POP r29,`$FRAME-$SIZE_T*3`($sp) | |
| 260 $POP r30,`$FRAME-$SIZE_T*2`($sp) | |
| 261 $POP r31,`$FRAME-$SIZE_T*1`($sp) | |
| 262 mtlr r0 | |
| 263 addi $sp,$sp,$FRAME | |
| 264 blr | |
| 265 .long 0 | |
| 266 .byte 0,12,4,1,0x80,18,3,0 | |
| 267 .long 0 | |
| 268 ___ | |
| 269 | |
| 270 # This is private block function, which uses tailored calling | |
| 271 # interface, namely upon entry SHA_CTX is pre-loaded to given | |
| 272 # registers and counter register contains amount of chunks to | |
| 273 # digest... | |
| 274 $code.=<<___; | |
| 275 .align 4 | |
| 276 Lsha1_block_private: | |
| 277 ___ | |
| 278 $code.=<<___; # load K_00_19 | |
| 279 lis $K,0x5a82 | |
| 280 ori $K,$K,0x7999 | |
| 281 ___ | |
| 282 for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } | |
| 283 $code.=<<___; # load K_20_39 | |
| 284 lis $K,0x6ed9 | |
| 285 ori $K,$K,0xeba1 | |
| 286 ___ | |
| 287 for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } | |
| 288 $code.=<<___; # load K_40_59 | |
| 289 lis $K,0x8f1b | |
| 290 ori $K,$K,0xbcdc | |
| 291 ___ | |
| 292 for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } | |
| 293 $code.=<<___; # load K_60_79 | |
| 294 lis $K,0xca62 | |
| 295 ori $K,$K,0xc1d6 | |
| 296 ___ | |
| 297 for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } | |
| 298 $code.=<<___; | |
| 299 add r16,r16,$E | |
| 300 add r17,r17,$T | |
| 301 add r18,r18,$A | |
| 302 add r19,r19,$B | |
| 303 add r20,r20,$C | |
| 304 stw r16,0($ctx) | |
| 305 mr $A,r16 | |
| 306 stw r17,4($ctx) | |
| 307 mr $B,r17 | |
| 308 stw r18,8($ctx) | |
| 309 mr $C,r18 | |
| 310 stw r19,12($ctx) | |
| 311 mr $D,r19 | |
| 312 stw r20,16($ctx) | |
| 313 mr $E,r20 | |
| 314 addi $inp,$inp,`16*4` | |
| 315 bdnz- Lsha1_block_private | |
| 316 blr | |
| 317 .long 0 | |
| 318 .byte 0,12,0x14,0,0,0,0,0 | |
| 319 ___ | |
| 320 $code.=<<___; | |
| 321 .asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>" | |
| 322 ___ | |
| 323 | |
| 324 $code =~ s/\`([^\`]*)\`/eval $1/gem; | |
| 325 print $code; | |
| 326 close STDOUT; | |
| OLD | NEW |