Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(226)

Side by Side Diff: openssl/crypto/sha/asm/sha512-parisc.pl

Issue 2072073002: Delete bundled copy of OpenSSL and replace with README. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/openssl@master
Patch Set: Delete bundled copy of OpenSSL and replace with README. Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « openssl/crypto/sha/asm/sha512-mips.pl ('k') | openssl/crypto/sha/asm/sha512-ppc.pl » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # SHA256/512 block procedure for PA-RISC.
11
12 # June 2009.
13 #
14 # SHA256 performance is >75% better than gcc 3.2 generated code on
15 # PA-7100LC. Compared to code generated by vendor compiler this
16 # implementation is almost 70% faster in 64-bit build, but delivers
17 # virtually same performance in 32-bit build on PA-8600.
18 #
19 # SHA512 performance is >2.9x better than gcc 3.2 generated code on
20 # PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the
21 # code is executed on PA-RISC 2.0 processor and switches to 64-bit
22 # code path delivering adequate peformance even in "blended" 32-bit
23 # build. Though 64-bit code is not any faster than code generated by
24 # vendor compiler on PA-8600...
25 #
26 # Special thanks to polarhome.com for providing HP-UX account.
27
28 $flavour = shift;
29 $output = shift;
30 open STDOUT,">$output";
31
32 if ($flavour =~ /64/) {
33 $LEVEL ="2.0W";
34 $SIZE_T =8;
35 $FRAME_MARKER =80;
36 $SAVED_RP =16;
37 $PUSH ="std";
38 $PUSHMA ="std,ma";
39 $POP ="ldd";
40 $POPMB ="ldd,mb";
41 } else {
42 $LEVEL ="1.0";
43 $SIZE_T =4;
44 $FRAME_MARKER =48;
45 $SAVED_RP =20;
46 $PUSH ="stw";
47 $PUSHMA ="stwm";
48 $POP ="ldw";
49 $POPMB ="ldwm";
50 }
51
52 if ($output =~ /512/) {
53 $func="sha512_block_data_order";
54 $SZ=8;
55 @Sigma0=(28,34,39);
56 @Sigma1=(14,18,41);
57 @sigma0=(1, 8, 7);
58 @sigma1=(19,61, 6);
59 $rounds=80;
60 $LAST10BITS=0x017;
61 $LD="ldd";
62 $LDM="ldd,ma";
63 $ST="std";
64 } else {
65 $func="sha256_block_data_order";
66 $SZ=4;
67 @Sigma0=( 2,13,22);
68 @Sigma1=( 6,11,25);
69 @sigma0=( 7,18, 3);
70 @sigma1=(17,19,10);
71 $rounds=64;
72 $LAST10BITS=0x0f2;
73 $LD="ldw";
74 $LDM="ldwm";
75 $ST="stw";
76 }
77
78 $FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker
79 # [+ argument transfer]
80 $XOFF=16*$SZ+32; # local variables
81 $FRAME+=$XOFF;
82 $XOFF+=$FRAME_MARKER; # distance between %sp and local variables
83
84 $ctx="%r26"; # zapped by $a0
85 $inp="%r25"; # zapped by $a1
86 $num="%r24"; # zapped by $t0
87
88 $a0 ="%r26";
89 $a1 ="%r25";
90 $t0 ="%r24";
91 $t1 ="%r29";
92 $Tbl="%r31";
93
94 @V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23"," %r28");
95
96 @X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
97 "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp);
98
99 sub ROUND_00_15 {
100 my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
101 $code.=<<___;
102 _ror $e,$Sigma1[0],$a0
103 and $f,$e,$t0
104 _ror $e,$Sigma1[1],$a1
105 addl $t1,$h,$h
106 andcm $g,$e,$t1
107 xor $a1,$a0,$a0
108 _ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1
109 or $t0,$t1,$t1 ; Ch(e,f,g)
110 addl @X[$i%16],$h,$h
111 xor $a0,$a1,$a1 ; Sigma1(e)
112 addl $t1,$h,$h
113 _ror $a,$Sigma0[0],$a0
114 addl $a1,$h,$h
115
116 _ror $a,$Sigma0[1],$a1
117 and $a,$b,$t0
118 and $a,$c,$t1
119 xor $a1,$a0,$a0
120 _ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1
121 xor $t1,$t0,$t0
122 and $b,$c,$t1
123 xor $a0,$a1,$a1 ; Sigma0(a)
124 addl $h,$d,$d
125 xor $t1,$t0,$t0 ; Maj(a,b,c)
126 `"$LDM $SZ($Tbl),$t1" if ($i<15)`
127 addl $a1,$h,$h
128 addl $t0,$h,$h
129
130 ___
131 }
132
133 sub ROUND_16_xx {
134 my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
135 $i-=16;
136 $code.=<<___;
137 _ror @X[($i+1)%16],$sigma0[0],$a0
138 _ror @X[($i+1)%16],$sigma0[1],$a1
139 addl @X[($i+9)%16],@X[$i],@X[$i]
140 _ror @X[($i+14)%16],$sigma1[0],$t0
141 _ror @X[($i+14)%16],$sigma1[1],$t1
142 xor $a1,$a0,$a0
143 _shr @X[($i+1)%16],$sigma0[2],$a1
144 xor $t1,$t0,$t0
145 _shr @X[($i+14)%16],$sigma1[2],$t1
146 xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f])
147 xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f])
148 $LDM $SZ($Tbl),$t1
149 addl $a0,@X[$i],@X[$i]
150 addl $t0,@X[$i],@X[$i]
151 ___
152 $code.=<<___ if ($i==15);
153 extru $t1,31,10,$a1
154 comiclr,<> $LAST10BITS,$a1,%r0
155 ldo 1($Tbl),$Tbl ; signal end of $Tbl
156 ___
157 &ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
158 }
159
160 $code=<<___;
161 .LEVEL $LEVEL
162 .SPACE \$TEXT\$
163 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
164
165 .ALIGN 64
166 L\$table
167 ___
168 $code.=<<___ if ($SZ==8);
169 .WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
170 .WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
171 .WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
172 .WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
173 .WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
174 .WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
175 .WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
176 .WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
177 .WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
178 .WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
179 .WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
180 .WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
181 .WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
182 .WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
183 .WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
184 .WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
185 .WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
186 .WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
187 .WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
188 .WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
189 .WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
190 .WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
191 .WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
192 .WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
193 .WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
194 .WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
195 .WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
196 .WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
197 .WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
198 .WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
199 .WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
200 .WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
201 .WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
202 .WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
203 .WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
204 .WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
205 .WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
206 .WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
207 .WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
208 .WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
209 ___
210 $code.=<<___ if ($SZ==4);
211 .WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
212 .WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
213 .WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
214 .WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
215 .WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
216 .WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
217 .WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
218 .WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
219 .WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
220 .WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
221 .WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
222 .WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
223 .WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
224 .WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
225 .WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
226 .WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
227 ___
228 $code.=<<___;
229
230 .EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
231 .ALIGN 64
232 $func
233 .PROC
234 .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18
235 .ENTRY
236 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
237 $PUSHMA %r3,$FRAME(%sp)
238 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
239 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
240 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
241 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
242 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
243 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
244 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
245 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
246 $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
247 $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
248 $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
249 $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
250 $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
251 $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp)
252 $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp)
253
254 _shl $num,`log(16*$SZ)/log(2)`,$num
255 addl $inp,$num,$num ; $num to point at the end of $inp
256
257 $PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments
258 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)
259 $PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp)
260
261 blr %r0,$Tbl
262 ldi 3,$t1
263 L\$pic
264 andcm $Tbl,$t1,$Tbl ; wipe privilege level
265 ldo L\$table-L\$pic($Tbl),$Tbl
266 ___
267 $code.=<<___ if ($SZ==8 && $SIZE_T==4);
268 ldi 31,$t1
269 mtctl $t1,%cr11
270 extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0
271 b L\$parisc1
272 nop
273 ___
274 $code.=<<___;
275 $LD `0*$SZ`($ctx),$A ; load context
276 $LD `1*$SZ`($ctx),$B
277 $LD `2*$SZ`($ctx),$C
278 $LD `3*$SZ`($ctx),$D
279 $LD `4*$SZ`($ctx),$E
280 $LD `5*$SZ`($ctx),$F
281 $LD `6*$SZ`($ctx),$G
282 $LD `7*$SZ`($ctx),$H
283
284 extru $inp,31,`log($SZ)/log(2)`,$t0
285 sh3addl $t0,%r0,$t0
286 subi `8*$SZ`,$t0,$t0
287 mtctl $t0,%cr11 ; load %sar with align factor
288
289 L\$oop
290 ldi `$SZ-1`,$t0
291 $LDM $SZ($Tbl),$t1
292 andcm $inp,$t0,$t0 ; align $inp
293 ___
294 for ($i=0;$i<15;$i++) { # load input block
295 $code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; }
296 $code.=<<___;
297 cmpb,*= $inp,$t0,L\$aligned
298 $LD `$SZ*15`($t0),@X[15]
299 $LD `$SZ*16`($t0),@X[16]
300 ___
301 for ($i=0;$i<16;$i++) { # align data
302 $code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; }
303 $code.=<<___;
304 L\$aligned
305 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
306 ___
307
308 for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); }
309 $code.=<<___;
310 L\$rounds
311 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
312 ___
313 for(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); }
314 $code.=<<___;
315 bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled?
316 nop
317
318 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
319 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
320 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
321 ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl
322
323 $LD `0*$SZ`($ctx),@X[0] ; load context
324 $LD `1*$SZ`($ctx),@X[1]
325 $LD `2*$SZ`($ctx),@X[2]
326 $LD `3*$SZ`($ctx),@X[3]
327 $LD `4*$SZ`($ctx),@X[4]
328 $LD `5*$SZ`($ctx),@X[5]
329 addl @X[0],$A,$A
330 $LD `6*$SZ`($ctx),@X[6]
331 addl @X[1],$B,$B
332 $LD `7*$SZ`($ctx),@X[7]
333 ldo `16*$SZ`($inp),$inp ; advance $inp
334
335 $ST $A,`0*$SZ`($ctx) ; save context
336 addl @X[2],$C,$C
337 $ST $B,`1*$SZ`($ctx)
338 addl @X[3],$D,$D
339 $ST $C,`2*$SZ`($ctx)
340 addl @X[4],$E,$E
341 $ST $D,`3*$SZ`($ctx)
342 addl @X[5],$F,$F
343 $ST $E,`4*$SZ`($ctx)
344 addl @X[6],$G,$G
345 $ST $F,`5*$SZ`($ctx)
346 addl @X[7],$H,$H
347 $ST $G,`6*$SZ`($ctx)
348 $ST $H,`7*$SZ`($ctx)
349
350 cmpb,*<>,n $inp,$num,L\$oop
351 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
352 ___
353 if ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0
354 {{
355 $code.=<<___;
356 b L\$done
357 nop
358
359 .ALIGN 64
360 L\$parisc1
361 ___
362
363 @V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo,
364 $Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) =
365 ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
366 "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16");
367 $a0 ="%r17";
368 $a1 ="%r18";
369 $a2 ="%r19";
370 $a3 ="%r20";
371 $t0 ="%r21";
372 $t1 ="%r22";
373 $t2 ="%r28";
374 $t3 ="%r29";
375 $Tbl="%r31";
376
377 @X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx
378
379 sub ROUND_00_15_pa1 {
380 my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
381 $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_;
382 my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
383
384 $code.=<<___ if (!$flag);
385 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
386 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
387 ___
388 $code.=<<___;
389 shd $ehi,$elo,$Sigma1[0],$t0
390 add $Xlo,$hlo,$hlo
391 shd $elo,$ehi,$Sigma1[0],$t1
392 addc $Xhi,$hhi,$hhi ; h += X[i]
393 shd $ehi,$elo,$Sigma1[1],$t2
394 ldwm 8($Tbl),$Xhi
395 shd $elo,$ehi,$Sigma1[1],$t3
396 ldw -4($Tbl),$Xlo ; load K[i]
397 xor $t2,$t0,$t0
398 xor $t3,$t1,$t1
399 and $flo,$elo,$a0
400 and $fhi,$ehi,$a1
401 shd $ehi,$elo,$Sigma1[2],$t2
402 andcm $glo,$elo,$a2
403 shd $elo,$ehi,$Sigma1[2],$t3
404 andcm $ghi,$ehi,$a3
405 xor $t2,$t0,$t0
406 xor $t3,$t1,$t1 ; Sigma1(e)
407 add $Xlo,$hlo,$hlo
408 xor $a2,$a0,$a0
409 addc $Xhi,$hhi,$hhi ; h += K[i]
410 xor $a3,$a1,$a1 ; Ch(e,f,g)
411
412 add $t0,$hlo,$hlo
413 shd $ahi,$alo,$Sigma0[0],$t0
414 addc $t1,$hhi,$hhi ; h += Sigma1(e)
415 shd $alo,$ahi,$Sigma0[0],$t1
416 add $a0,$hlo,$hlo
417 shd $ahi,$alo,$Sigma0[1],$t2
418 addc $a1,$hhi,$hhi ; h += Ch(e,f,g)
419 shd $alo,$ahi,$Sigma0[1],$t3
420
421 xor $t2,$t0,$t0
422 xor $t3,$t1,$t1
423 shd $ahi,$alo,$Sigma0[2],$t2
424 and $alo,$blo,$a0
425 shd $alo,$ahi,$Sigma0[2],$t3
426 and $ahi,$bhi,$a1
427 xor $t2,$t0,$t0
428 xor $t3,$t1,$t1 ; Sigma0(a)
429
430 and $alo,$clo,$a2
431 and $ahi,$chi,$a3
432 xor $a2,$a0,$a0
433 add $hlo,$dlo,$dlo
434 xor $a3,$a1,$a1
435 addc $hhi,$dhi,$dhi ; d += h
436 and $blo,$clo,$a2
437 add $t0,$hlo,$hlo
438 and $bhi,$chi,$a3
439 addc $t1,$hhi,$hhi ; h += Sigma0(a)
440 xor $a2,$a0,$a0
441 add $a0,$hlo,$hlo
442 xor $a3,$a1,$a1 ; Maj(a,b,c)
443 addc $a1,$hhi,$hhi ; h += Maj(a,b,c)
444
445 ___
446 $code.=<<___ if ($i==15 && $flag);
447 extru $Xlo,31,10,$Xlo
448 comiclr,= $LAST10BITS,$Xlo,%r0
449 b L\$rounds_pa1
450 nop
451 ___
452 push(@X,shift(@X)); push(@X,shift(@X));
453 }
454
455 sub ROUND_16_xx_pa1 {
456 my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
457 my ($i)=shift;
458 $i-=16;
459 $code.=<<___;
460 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
461 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
462 ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1
463 ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9]
464 ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3
465 ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14]
466 shd $Xnhi,$Xnlo,$sigma0[0],$t0
467 shd $Xnlo,$Xnhi,$sigma0[0],$t1
468 add $a0,$Xlo,$Xlo
469 shd $Xnhi,$Xnlo,$sigma0[1],$t2
470 addc $a1,$Xhi,$Xhi
471 shd $Xnlo,$Xnhi,$sigma0[1],$t3
472 xor $t2,$t0,$t0
473 shd $Xnhi,$Xnlo,$sigma0[2],$t2
474 xor $t3,$t1,$t1
475 extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3
476 xor $t2,$t0,$t0
477 shd $a3,$a2,$sigma1[0],$a0
478 xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f])
479 shd $a2,$a3,$sigma1[0],$a1
480 add $t0,$Xlo,$Xlo
481 shd $a3,$a2,$sigma1[1],$t2
482 addc $t1,$Xhi,$Xhi
483 shd $a2,$a3,$sigma1[1],$t3
484 xor $t2,$a0,$a0
485 shd $a3,$a2,$sigma1[2],$t2
486 xor $t3,$a1,$a1
487 extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3
488 xor $t2,$a0,$a0
489 xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f])
490 add $a0,$Xlo,$Xlo
491 addc $a1,$Xhi,$Xhi
492
493 stw $Xhi,`-$XOFF+8*($i%16)`(%sp)
494 stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp)
495 ___
496 &ROUND_00_15_pa1($i,@_,1);
497 }
498 $code.=<<___;
499 ldw `0*4`($ctx),$Ahi ; load context
500 ldw `1*4`($ctx),$Alo
501 ldw `2*4`($ctx),$Bhi
502 ldw `3*4`($ctx),$Blo
503 ldw `4*4`($ctx),$Chi
504 ldw `5*4`($ctx),$Clo
505 ldw `6*4`($ctx),$Dhi
506 ldw `7*4`($ctx),$Dlo
507 ldw `8*4`($ctx),$Ehi
508 ldw `9*4`($ctx),$Elo
509 ldw `10*4`($ctx),$Fhi
510 ldw `11*4`($ctx),$Flo
511 ldw `12*4`($ctx),$Ghi
512 ldw `13*4`($ctx),$Glo
513 ldw `14*4`($ctx),$Hhi
514 ldw `15*4`($ctx),$Hlo
515
516 extru $inp,31,2,$t0
517 sh3addl $t0,%r0,$t0
518 subi 32,$t0,$t0
519 mtctl $t0,%cr11 ; load %sar with align factor
520
521 L\$oop_pa1
522 extru $inp,31,2,$a3
523 comib,= 0,$a3,L\$aligned_pa1
524 sub $inp,$a3,$inp
525
526 ldw `0*4`($inp),$X[0]
527 ldw `1*4`($inp),$X[1]
528 ldw `2*4`($inp),$t2
529 ldw `3*4`($inp),$t3
530 ldw `4*4`($inp),$a0
531 ldw `5*4`($inp),$a1
532 ldw `6*4`($inp),$a2
533 ldw `7*4`($inp),$a3
534 vshd $X[0],$X[1],$X[0]
535 vshd $X[1],$t2,$X[1]
536 stw $X[0],`-$XOFF+0*4`(%sp)
537 ldw `8*4`($inp),$t0
538 vshd $t2,$t3,$t2
539 stw $X[1],`-$XOFF+1*4`(%sp)
540 ldw `9*4`($inp),$t1
541 vshd $t3,$a0,$t3
542 ___
543 {
544 my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
545 for ($i=2;$i<=(128/4-8);$i++) {
546 $code.=<<___;
547 stw $t[0],`-$XOFF+$i*4`(%sp)
548 ldw `(8+$i)*4`($inp),$t[0]
549 vshd $t[1],$t[2],$t[1]
550 ___
551 push(@t,shift(@t));
552 }
553 for (;$i<(128/4-1);$i++) {
554 $code.=<<___;
555 stw $t[0],`-$XOFF+$i*4`(%sp)
556 vshd $t[1],$t[2],$t[1]
557 ___
558 push(@t,shift(@t));
559 }
560 $code.=<<___;
561 b L\$collected_pa1
562 stw $t[0],`-$XOFF+$i*4`(%sp)
563
564 ___
565 }
566 $code.=<<___;
567 L\$aligned_pa1
568 ldw `0*4`($inp),$X[0]
569 ldw `1*4`($inp),$X[1]
570 ldw `2*4`($inp),$t2
571 ldw `3*4`($inp),$t3
572 ldw `4*4`($inp),$a0
573 ldw `5*4`($inp),$a1
574 ldw `6*4`($inp),$a2
575 ldw `7*4`($inp),$a3
576 stw $X[0],`-$XOFF+0*4`(%sp)
577 ldw `8*4`($inp),$t0
578 stw $X[1],`-$XOFF+1*4`(%sp)
579 ldw `9*4`($inp),$t1
580 ___
581 {
582 my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
583 for ($i=2;$i<(128/4-8);$i++) {
584 $code.=<<___;
585 stw $t[0],`-$XOFF+$i*4`(%sp)
586 ldw `(8+$i)*4`($inp),$t[0]
587 ___
588 push(@t,shift(@t));
589 }
590 for (;$i<128/4;$i++) {
591 $code.=<<___;
592 stw $t[0],`-$XOFF+$i*4`(%sp)
593 ___
594 push(@t,shift(@t));
595 }
596 $code.="L\$collected_pa1\n";
597 }
598
599 for($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift( @V,pop(@V)); }
600 $code.="L\$rounds_pa1\n";
601 for(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift( @V,pop(@V)); }
602
603 $code.=<<___;
604 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
605 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
606 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
607 ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl
608
609 ldw `0*4`($ctx),$t1 ; update context
610 ldw `1*4`($ctx),$t0
611 ldw `2*4`($ctx),$t3
612 ldw `3*4`($ctx),$t2
613 ldw `4*4`($ctx),$a1
614 ldw `5*4`($ctx),$a0
615 ldw `6*4`($ctx),$a3
616 add $t0,$Alo,$Alo
617 ldw `7*4`($ctx),$a2
618 addc $t1,$Ahi,$Ahi
619 ldw `8*4`($ctx),$t1
620 add $t2,$Blo,$Blo
621 ldw `9*4`($ctx),$t0
622 addc $t3,$Bhi,$Bhi
623 ldw `10*4`($ctx),$t3
624 add $a0,$Clo,$Clo
625 ldw `11*4`($ctx),$t2
626 addc $a1,$Chi,$Chi
627 ldw `12*4`($ctx),$a1
628 add $a2,$Dlo,$Dlo
629 ldw `13*4`($ctx),$a0
630 addc $a3,$Dhi,$Dhi
631 ldw `14*4`($ctx),$a3
632 add $t0,$Elo,$Elo
633 ldw `15*4`($ctx),$a2
634 addc $t1,$Ehi,$Ehi
635 stw $Ahi,`0*4`($ctx)
636 add $t2,$Flo,$Flo
637 stw $Alo,`1*4`($ctx)
638 addc $t3,$Fhi,$Fhi
639 stw $Bhi,`2*4`($ctx)
640 add $a0,$Glo,$Glo
641 stw $Blo,`3*4`($ctx)
642 addc $a1,$Ghi,$Ghi
643 stw $Chi,`4*4`($ctx)
644 add $a2,$Hlo,$Hlo
645 stw $Clo,`5*4`($ctx)
646 addc $a3,$Hhi,$Hhi
647 stw $Dhi,`6*4`($ctx)
648 ldo `16*$SZ`($inp),$inp ; advance $inp
649 stw $Dlo,`7*4`($ctx)
650 stw $Ehi,`8*4`($ctx)
651 stw $Elo,`9*4`($ctx)
652 stw $Fhi,`10*4`($ctx)
653 stw $Flo,`11*4`($ctx)
654 stw $Ghi,`12*4`($ctx)
655 stw $Glo,`13*4`($ctx)
656 stw $Hhi,`14*4`($ctx)
657 comb,= $inp,$num,L\$done
658 stw $Hlo,`15*4`($ctx)
659 b L\$oop_pa1
660 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
661 L\$done
662 ___
663 }}
664 $code.=<<___;
665 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
666 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
667 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
668 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
669 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
670 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
671 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
672 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
673 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
674 $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
675 $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
676 $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
677 $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
678 $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
679 $POP `-$FRAME+14*$SIZE_T`(%sp),%r17
680 $POP `-$FRAME+15*$SIZE_T`(%sp),%r18
681 bv (%r2)
682 .EXIT
683 $POPMB -$FRAME(%sp),%r3
684 .PROCEND
685 .STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\ @openssl.org>"
686 ___
687
688 # Explicitly encode PA-RISC 2.0 instructions used in this module, so
689 # that it can be compiled with .LEVEL 1.0. It should be noted that I
690 # wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
691 # directive...
692
693 my $ldd = sub {
694 my ($mod,$args) = @_;
695 my $orig = "ldd$mod\t$args";
696
697 if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices
698 { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1);
699 $opcode|=(1<<3) if ($mod =~ /^,m/);
700 $opcode|=(1<<2) if ($mod =~ /^,mb/);
701 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
702 }
703 else { "\t".$orig; }
704 };
705
706 my $std = sub {
707 my ($mod,$args) = @_;
708 my $orig = "std$mod\t$args";
709
710 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
711 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
712 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
713 }
714 else { "\t".$orig; }
715 };
716
717 my $extrd = sub {
718 my ($mod,$args) = @_;
719 my $orig = "extrd$mod\t$args";
720
721 # I only have ",u" completer, it's implicitly encoded...
722 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
723 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
724 my $len=32-$3;
725 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
726 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
727 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
728 }
729 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
730 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
731 my $len=32-$2;
732 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
733 $opcode |= (1<<13) if ($mod =~ /,\**=/);
734 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
735 }
736 else { "\t".$orig; }
737 };
738
739 my $shrpd = sub {
740 my ($mod,$args) = @_;
741 my $orig = "shrpd$mod\t$args";
742
743 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
744 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
745 my $cpos=63-$3;
746 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
747 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
748 }
749 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
750 { sprintf "\t.WORD\t0x%08x\t; %s",
751 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
752 }
753 else { "\t".$orig; }
754 };
755
756 sub assemble {
757 my ($mnemonic,$mod,$args)=@_;
758 my $opcode = eval("\$$mnemonic");
759
760 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
761 }
762
763 foreach (split("\n",$code)) {
764 s/\`([^\`]*)\`/eval $1/ge;
765
766 s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/
767 $3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for > =32
768 : sprintf("shd\t%$1,%$2,%d",$3)/e or
769 # translate made up instructons: _ror, _shr, _align, _shl
770 s/_ror(\s+)(%r[0-9]+),/
771 ($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or
772
773 s/_shr(\s+%r[0-9]+),([0-9]+),/
774 $SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2)
775 : sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or
776
777 s/_align(\s+%r[0-9]+,%r[0-9]+),/
778 ($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or
779
780 s/_shl(\s+%r[0-9]+),([0-9]+),/
781 $SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2)
782 : sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e;
783
784 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4);
785
786 s/cmpb,\*/comb,/ if ($SIZE_T==4);
787
788 print $_,"\n";
789 }
790
791 close STDOUT;
OLDNEW
« no previous file with comments | « openssl/crypto/sha/asm/sha512-mips.pl ('k') | openssl/crypto/sha/asm/sha512-ppc.pl » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698