OLD | NEW |
| (Empty) |
1 #!/usr/bin/env perl | |
2 | |
3 # ==================================================================== | |
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | |
5 # project. The module is, however, dual licensed under OpenSSL and | |
6 # CRYPTOGAMS licenses depending on where you obtain it. For further | |
7 # details see http://www.openssl.org/~appro/cryptogams/. | |
8 # ==================================================================== | |
9 | |
10 # SHA1 block procedure for PA-RISC. | |
11 | |
12 # June 2009. | |
13 # | |
14 # On PA-7100LC performance is >30% better than gcc 3.2 generated code | |
15 # for aligned input and >50% better for unaligned. Compared to vendor | |
16 # compiler on PA-8600 it's almost 60% faster in 64-bit build and just | |
17 # few percent faster in 32-bit one (this for aligned input, data for | |
18 # unaligned input is not available). | |
19 # | |
20 # Special thanks to polarhome.com for providing HP-UX account. | |
21 | |
22 $flavour = shift; | |
23 $output = shift; | |
24 open STDOUT,">$output"; | |
25 | |
26 if ($flavour =~ /64/) { | |
27 $LEVEL ="2.0W"; | |
28 $SIZE_T =8; | |
29 $FRAME_MARKER =80; | |
30 $SAVED_RP =16; | |
31 $PUSH ="std"; | |
32 $PUSHMA ="std,ma"; | |
33 $POP ="ldd"; | |
34 $POPMB ="ldd,mb"; | |
35 } else { | |
36 $LEVEL ="1.0"; | |
37 $SIZE_T =4; | |
38 $FRAME_MARKER =48; | |
39 $SAVED_RP =20; | |
40 $PUSH ="stw"; | |
41 $PUSHMA ="stwm"; | |
42 $POP ="ldw"; | |
43 $POPMB ="ldwm"; | |
44 } | |
45 | |
46 $FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker | |
47 # [+ argument transfer] | |
48 $ctx="%r26"; # arg0 | |
49 $inp="%r25"; # arg1 | |
50 $num="%r24"; # arg2 | |
51 | |
52 $t0="%r28"; | |
53 $t1="%r29"; | |
54 $K="%r31"; | |
55 | |
56 @X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", | |
57 "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0); | |
58 | |
59 @V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23"); | |
60 | |
61 sub BODY_00_19 { | |
62 my ($i,$a,$b,$c,$d,$e)=@_; | |
63 my $j=$i+1; | |
64 $code.=<<___ if ($i<15); | |
65 addl $K,$e,$e ; $i | |
66 shd $a,$a,27,$t1 | |
67 addl @X[$i],$e,$e | |
68 and $c,$b,$t0 | |
69 addl $t1,$e,$e | |
70 andcm $d,$b,$t1 | |
71 shd $b,$b,2,$b | |
72 or $t1,$t0,$t0 | |
73 addl $t0,$e,$e | |
74 ___ | |
75 $code.=<<___ if ($i>=15); # with forward Xupdate | |
76 addl $K,$e,$e ; $i | |
77 shd $a,$a,27,$t1 | |
78 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] | |
79 addl @X[$i%16],$e,$e | |
80 and $c,$b,$t0 | |
81 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] | |
82 addl $t1,$e,$e | |
83 andcm $d,$b,$t1 | |
84 shd $b,$b,2,$b | |
85 or $t1,$t0,$t0 | |
86 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] | |
87 add $t0,$e,$e | |
88 shd @X[$j%16],@X[$j%16],31,@X[$j%16] | |
89 ___ | |
90 } | |
91 | |
92 sub BODY_20_39 { | |
93 my ($i,$a,$b,$c,$d,$e)=@_; | |
94 my $j=$i+1; | |
95 $code.=<<___ if ($i<79); | |
96 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] ; $i | |
97 addl $K,$e,$e | |
98 shd $a,$a,27,$t1 | |
99 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] | |
100 addl @X[$i%16],$e,$e | |
101 xor $b,$c,$t0 | |
102 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] | |
103 addl $t1,$e,$e | |
104 shd $b,$b,2,$b | |
105 xor $d,$t0,$t0 | |
106 shd @X[$j%16],@X[$j%16],31,@X[$j%16] | |
107 addl $t0,$e,$e | |
108 ___ | |
109 $code.=<<___ if ($i==79); # with context load | |
110 ldw 0($ctx),@X[0] ; $i | |
111 addl $K,$e,$e | |
112 shd $a,$a,27,$t1 | |
113 ldw 4($ctx),@X[1] | |
114 addl @X[$i%16],$e,$e | |
115 xor $b,$c,$t0 | |
116 ldw 8($ctx),@X[2] | |
117 addl $t1,$e,$e | |
118 shd $b,$b,2,$b | |
119 xor $d,$t0,$t0 | |
120 ldw 12($ctx),@X[3] | |
121 addl $t0,$e,$e | |
122 ldw 16($ctx),@X[4] | |
123 ___ | |
124 } | |
125 | |
126 sub BODY_40_59 { | |
127 my ($i,$a,$b,$c,$d,$e)=@_; | |
128 my $j=$i+1; | |
129 $code.=<<___; | |
130 shd $a,$a,27,$t1 ; $i | |
131 addl $K,$e,$e | |
132 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] | |
133 xor $d,$c,$t0 | |
134 addl @X[$i%16],$e,$e | |
135 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] | |
136 and $b,$t0,$t0 | |
137 addl $t1,$e,$e | |
138 shd $b,$b,2,$b | |
139 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] | |
140 addl $t0,$e,$e | |
141 and $d,$c,$t1 | |
142 shd @X[$j%16],@X[$j%16],31,@X[$j%16] | |
143 addl $t1,$e,$e | |
144 ___ | |
145 } | |
146 | |
147 $code=<<___; | |
148 .LEVEL $LEVEL | |
149 .SPACE \$TEXT\$ | |
150 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY | |
151 | |
152 .EXPORT sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR | |
153 sha1_block_data_order | |
154 .PROC | |
155 .CALLINFO FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16 | |
156 .ENTRY | |
157 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue | |
158 $PUSHMA %r3,$FRAME(%sp) | |
159 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) | |
160 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) | |
161 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) | |
162 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) | |
163 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) | |
164 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) | |
165 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) | |
166 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) | |
167 $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) | |
168 $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) | |
169 $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) | |
170 $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) | |
171 $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) | |
172 | |
173 ldw 0($ctx),$A | |
174 ldw 4($ctx),$B | |
175 ldw 8($ctx),$C | |
176 ldw 12($ctx),$D | |
177 ldw 16($ctx),$E | |
178 | |
179 extru $inp,31,2,$t0 ; t0=inp&3; | |
180 sh3addl $t0,%r0,$t0 ; t0*=8; | |
181 subi 32,$t0,$t0 ; t0=32-t0; | |
182 mtctl $t0,%cr11 ; %sar=t0; | |
183 | |
184 L\$oop | |
185 ldi 3,$t0 | |
186 andcm $inp,$t0,$t0 ; 64-bit neutral | |
187 ___ | |
188 for ($i=0;$i<15;$i++) { # load input block | |
189 $code.="\tldw `4*$i`($t0),@X[$i]\n"; } | |
190 $code.=<<___; | |
191 cmpb,*= $inp,$t0,L\$aligned | |
192 ldw 60($t0),@X[15] | |
193 ldw 64($t0),@X[16] | |
194 ___ | |
195 for ($i=0;$i<16;$i++) { # align input | |
196 $code.="\tvshd @X[$i],@X[$i+1],@X[$i]\n"; } | |
197 $code.=<<___; | |
198 L\$aligned | |
199 ldil L'0x5a827000,$K ; K_00_19 | |
200 ldo 0x999($K),$K | |
201 ___ | |
202 for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } | |
203 $code.=<<___; | |
204 ldil L'0x6ed9e000,$K ; K_20_39 | |
205 ldo 0xba1($K),$K | |
206 ___ | |
207 | |
208 for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } | |
209 $code.=<<___; | |
210 ldil L'0x8f1bb000,$K ; K_40_59 | |
211 ldo 0xcdc($K),$K | |
212 ___ | |
213 | |
214 for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } | |
215 $code.=<<___; | |
216 ldil L'0xca62c000,$K ; K_60_79 | |
217 ldo 0x1d6($K),$K | |
218 ___ | |
219 for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } | |
220 | |
221 $code.=<<___; | |
222 addl @X[0],$A,$A | |
223 addl @X[1],$B,$B | |
224 addl @X[2],$C,$C | |
225 addl @X[3],$D,$D | |
226 addl @X[4],$E,$E | |
227 stw $A,0($ctx) | |
228 stw $B,4($ctx) | |
229 stw $C,8($ctx) | |
230 stw $D,12($ctx) | |
231 stw $E,16($ctx) | |
232 addib,*<> -1,$num,L\$oop | |
233 ldo 64($inp),$inp | |
234 | |
235 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue | |
236 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 | |
237 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 | |
238 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 | |
239 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 | |
240 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 | |
241 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 | |
242 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 | |
243 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 | |
244 $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 | |
245 $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 | |
246 $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 | |
247 $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 | |
248 $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 | |
249 bv (%r2) | |
250 .EXIT | |
251 $POPMB -$FRAME(%sp),%r3 | |
252 .PROCEND | |
253 .STRINGZ "SHA1 block transform for PA-RISC, CRYPTOGAMS by <appro\@openss
l.org>" | |
254 ___ | |
255 | |
256 $code =~ s/\`([^\`]*)\`/eval $1/gem; | |
257 $code =~ s/,\*/,/gm if ($SIZE_T==4); | |
258 print $code; | |
259 close STDOUT; | |
OLD | NEW |