OLD | NEW |
| (Empty) |
1 #!/usr/bin/env perl | |
2 | |
3 # ==================================================================== | |
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | |
5 # project. The module is, however, dual licensed under OpenSSL and | |
6 # CRYPTOGAMS licenses depending on where you obtain it. For further | |
7 # details see http://www.openssl.org/~appro/cryptogams/. | |
8 # ==================================================================== | |
9 | |
10 # SHA1 block procedure for Alpha. | |
11 | |
12 # On 21264 performance is 33% better than code generated by vendor | |
13 # compiler, and 75% better than GCC [3.4], and in absolute terms is | |
14 # 8.7 cycles per processed byte. Implementation features vectorized | |
15 # byte swap, but not Xupdate. | |
16 | |
17 @X=( "\$0", "\$1", "\$2", "\$3", "\$4", "\$5", "\$6", "\$7", | |
18 "\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15"); | |
19 $ctx="a0"; # $16 | |
20 $inp="a1"; | |
21 $num="a2"; | |
22 $A="a3"; | |
23 $B="a4"; # 20 | |
24 $C="a5"; | |
25 $D="t8"; | |
26 $E="t9"; @V=($A,$B,$C,$D,$E); | |
27 $t0="t10"; # 24 | |
28 $t1="t11"; | |
29 $t2="ra"; | |
30 $t3="t12"; | |
31 $K="AT"; # 28 | |
32 | |
33 sub BODY_00_19 { | |
34 my ($i,$a,$b,$c,$d,$e)=@_; | |
35 my $j=$i+1; | |
36 $code.=<<___ if ($i==0); | |
37 ldq_u @X[0],0+0($inp) | |
38 ldq_u @X[1],0+7($inp) | |
39 ___ | |
40 $code.=<<___ if (!($i&1) && $i<14); | |
41 ldq_u @X[$i+2],($i+2)*4+0($inp) | |
42 ldq_u @X[$i+3],($i+2)*4+7($inp) | |
43 ___ | |
44 $code.=<<___ if (!($i&1) && $i<15); | |
45 extql @X[$i],$inp,@X[$i] | |
46 extqh @X[$i+1],$inp,@X[$i+1] | |
47 | |
48 or @X[$i+1],@X[$i],@X[$i] # pair of 32-bit values are fetched | |
49 | |
50 srl @X[$i],24,$t0 # vectorized byte swap | |
51 srl @X[$i],8,$t2 | |
52 | |
53 sll @X[$i],8,$t3 | |
54 sll @X[$i],24,@X[$i] | |
55 zapnot $t0,0x11,$t0 | |
56 zapnot $t2,0x22,$t2 | |
57 | |
58 zapnot @X[$i],0x88,@X[$i] | |
59 or $t0,$t2,$t0 | |
60 zapnot $t3,0x44,$t3 | |
61 sll $a,5,$t1 | |
62 | |
63 or @X[$i],$t0,@X[$i] | |
64 addl $K,$e,$e | |
65 and $b,$c,$t2 | |
66 zapnot $a,0xf,$a | |
67 | |
68 or @X[$i],$t3,@X[$i] | |
69 srl $a,27,$t0 | |
70 bic $d,$b,$t3 | |
71 sll $b,30,$b | |
72 | |
73 extll @X[$i],4,@X[$i+1] # extract upper half | |
74 or $t2,$t3,$t2 | |
75 addl @X[$i],$e,$e | |
76 | |
77 addl $t1,$e,$e | |
78 srl $b,32,$t3 | |
79 zapnot @X[$i],0xf,@X[$i] | |
80 | |
81 addl $t0,$e,$e | |
82 addl $t2,$e,$e | |
83 or $t3,$b,$b | |
84 ___ | |
85 $code.=<<___ if (($i&1) && $i<15); | |
86 sll $a,5,$t1 | |
87 addl $K,$e,$e | |
88 and $b,$c,$t2 | |
89 zapnot $a,0xf,$a | |
90 | |
91 srl $a,27,$t0 | |
92 addl @X[$i%16],$e,$e | |
93 bic $d,$b,$t3 | |
94 sll $b,30,$b | |
95 | |
96 or $t2,$t3,$t2 | |
97 addl $t1,$e,$e | |
98 srl $b,32,$t3 | |
99 zapnot @X[$i],0xf,@X[$i] | |
100 | |
101 addl $t0,$e,$e | |
102 addl $t2,$e,$e | |
103 or $t3,$b,$b | |
104 ___ | |
105 $code.=<<___ if ($i>=15); # with forward Xupdate | |
106 sll $a,5,$t1 | |
107 addl $K,$e,$e | |
108 and $b,$c,$t2 | |
109 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] | |
110 | |
111 zapnot $a,0xf,$a | |
112 addl @X[$i%16],$e,$e | |
113 bic $d,$b,$t3 | |
114 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] | |
115 | |
116 srl $a,27,$t0 | |
117 addl $t1,$e,$e | |
118 or $t2,$t3,$t2 | |
119 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] | |
120 | |
121 sll $b,30,$b | |
122 addl $t0,$e,$e | |
123 srl @X[$j%16],31,$t1 | |
124 | |
125 addl $t2,$e,$e | |
126 srl $b,32,$t3 | |
127 addl @X[$j%16],@X[$j%16],@X[$j%16] | |
128 | |
129 or $t3,$b,$b | |
130 zapnot @X[$i%16],0xf,@X[$i%16] | |
131 or $t1,@X[$j%16],@X[$j%16] | |
132 ___ | |
133 } | |
134 | |
135 sub BODY_20_39 { | |
136 my ($i,$a,$b,$c,$d,$e)=@_; | |
137 my $j=$i+1; | |
138 $code.=<<___ if ($i<79); # with forward Xupdate | |
139 sll $a,5,$t1 | |
140 addl $K,$e,$e | |
141 zapnot $a,0xf,$a | |
142 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] | |
143 | |
144 sll $b,30,$t3 | |
145 addl $t1,$e,$e | |
146 xor $b,$c,$t2 | |
147 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] | |
148 | |
149 srl $b,2,$b | |
150 addl @X[$i%16],$e,$e | |
151 xor $d,$t2,$t2 | |
152 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] | |
153 | |
154 srl @X[$j%16],31,$t1 | |
155 addl $t2,$e,$e | |
156 srl $a,27,$t0 | |
157 addl @X[$j%16],@X[$j%16],@X[$j%16] | |
158 | |
159 or $t3,$b,$b | |
160 addl $t0,$e,$e | |
161 or $t1,@X[$j%16],@X[$j%16] | |
162 ___ | |
163 $code.=<<___ if ($i<77); | |
164 zapnot @X[$i%16],0xf,@X[$i%16] | |
165 ___ | |
166 $code.=<<___ if ($i==79); # with context fetch | |
167 sll $a,5,$t1 | |
168 addl $K,$e,$e | |
169 zapnot $a,0xf,$a | |
170 ldl @X[0],0($ctx) | |
171 | |
172 sll $b,30,$t3 | |
173 addl $t1,$e,$e | |
174 xor $b,$c,$t2 | |
175 ldl @X[1],4($ctx) | |
176 | |
177 srl $b,2,$b | |
178 addl @X[$i%16],$e,$e | |
179 xor $d,$t2,$t2 | |
180 ldl @X[2],8($ctx) | |
181 | |
182 srl $a,27,$t0 | |
183 addl $t2,$e,$e | |
184 ldl @X[3],12($ctx) | |
185 | |
186 or $t3,$b,$b | |
187 addl $t0,$e,$e | |
188 ldl @X[4],16($ctx) | |
189 ___ | |
190 } | |
191 | |
192 sub BODY_40_59 { | |
193 my ($i,$a,$b,$c,$d,$e)=@_; | |
194 my $j=$i+1; | |
195 $code.=<<___; # with forward Xupdate | |
196 sll $a,5,$t1 | |
197 addl $K,$e,$e | |
198 zapnot $a,0xf,$a | |
199 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] | |
200 | |
201 srl $a,27,$t0 | |
202 and $b,$c,$t2 | |
203 and $b,$d,$t3 | |
204 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] | |
205 | |
206 sll $b,30,$b | |
207 addl $t1,$e,$e | |
208 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] | |
209 | |
210 srl @X[$j%16],31,$t1 | |
211 addl $t0,$e,$e | |
212 or $t2,$t3,$t2 | |
213 and $c,$d,$t3 | |
214 | |
215 or $t2,$t3,$t2 | |
216 srl $b,32,$t3 | |
217 addl @X[$i%16],$e,$e | |
218 addl @X[$j%16],@X[$j%16],@X[$j%16] | |
219 | |
220 or $t3,$b,$b | |
221 addl $t2,$e,$e | |
222 or $t1,@X[$j%16],@X[$j%16] | |
223 zapnot @X[$i%16],0xf,@X[$i%16] | |
224 ___ | |
225 } | |
226 | |
227 $code=<<___; | |
228 #ifdef __linux__ | |
229 #include <asm/regdef.h> | |
230 #else | |
231 #include <asm.h> | |
232 #include <regdef.h> | |
233 #endif | |
234 | |
235 .text | |
236 | |
237 .set noat | |
238 .set noreorder | |
239 .globl sha1_block_data_order | |
240 .align 5 | |
241 .ent sha1_block_data_order | |
242 sha1_block_data_order: | |
243 lda sp,-64(sp) | |
244 stq ra,0(sp) | |
245 stq s0,8(sp) | |
246 stq s1,16(sp) | |
247 stq s2,24(sp) | |
248 stq s3,32(sp) | |
249 stq s4,40(sp) | |
250 stq s5,48(sp) | |
251 stq fp,56(sp) | |
252 .mask 0x0400fe00,-64 | |
253 .frame sp,64,ra | |
254 .prologue 0 | |
255 | |
256 ldl $A,0($ctx) | |
257 ldl $B,4($ctx) | |
258 sll $num,6,$num | |
259 ldl $C,8($ctx) | |
260 ldl $D,12($ctx) | |
261 ldl $E,16($ctx) | |
262 addq $inp,$num,$num | |
263 | |
264 .Lloop: | |
265 .set noreorder | |
266 ldah $K,23170(zero) | |
267 zapnot $B,0xf,$B | |
268 lda $K,31129($K) # K_00_19 | |
269 ___ | |
270 for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } | |
271 | |
272 $code.=<<___; | |
273 ldah $K,28378(zero) | |
274 lda $K,-5215($K) # K_20_39 | |
275 ___ | |
276 for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } | |
277 | |
278 $code.=<<___; | |
279 ldah $K,-28900(zero) | |
280 lda $K,-17188($K) # K_40_59 | |
281 ___ | |
282 for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } | |
283 | |
284 $code.=<<___; | |
285 ldah $K,-13725(zero) | |
286 lda $K,-15914($K) # K_60_79 | |
287 ___ | |
288 for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } | |
289 | |
290 $code.=<<___; | |
291 addl @X[0],$A,$A | |
292 addl @X[1],$B,$B | |
293 addl @X[2],$C,$C | |
294 addl @X[3],$D,$D | |
295 addl @X[4],$E,$E | |
296 stl $A,0($ctx) | |
297 stl $B,4($ctx) | |
298 addq $inp,64,$inp | |
299 stl $C,8($ctx) | |
300 stl $D,12($ctx) | |
301 stl $E,16($ctx) | |
302 cmpult $inp,$num,$t1 | |
303 bne $t1,.Lloop | |
304 | |
305 .set noreorder | |
306 ldq ra,0(sp) | |
307 ldq s0,8(sp) | |
308 ldq s1,16(sp) | |
309 ldq s2,24(sp) | |
310 ldq s3,32(sp) | |
311 ldq s4,40(sp) | |
312 ldq s5,48(sp) | |
313 ldq fp,56(sp) | |
314 lda sp,64(sp) | |
315 ret (ra) | |
316 .end sha1_block_data_order | |
317 .ascii "SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>" | |
318 .align 2 | |
319 ___ | |
320 $output=shift and open STDOUT,">$output"; | |
321 print $code; | |
322 close STDOUT; | |
OLD | NEW |