OLD | NEW |
| (Empty) |
1 #!/usr/bin/env perl | |
2 | |
3 # ==================================================================== | |
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | |
5 # project. The module is, however, dual licensed under OpenSSL and | |
6 # CRYPTOGAMS licenses depending on where you obtain it. For further | |
7 # details see http://www.openssl.org/~appro/cryptogams/. | |
8 # ==================================================================== | |
9 | |
10 # sha1_block for Thumb. | |
11 # | |
12 # January 2007. | |
13 # | |
14 # The code does not present direct interest to OpenSSL, because of low | |
15 # performance. Its purpose is to establish _size_ benchmark. Pretty | |
16 # useless one I must say, because 30% or 88 bytes larger ARMv4 code | |
17 # [avialable on demand] is almost _twice_ as fast. It should also be | |
18 # noted that in-lining of .Lcommon and .Lrotate improves performance | |
19 # by over 40%, while code increases by only 10% or 32 bytes. But once | |
20 # again, the goal was to establish _size_ benchmark, not performance. | |
21 | |
22 $output=shift; | |
23 open STDOUT,">$output"; | |
24 | |
25 $inline=0; | |
26 #$cheat_on_binutils=1; | |
27 | |
28 $t0="r0"; | |
29 $t1="r1"; | |
30 $t2="r2"; | |
31 $a="r3"; | |
32 $b="r4"; | |
33 $c="r5"; | |
34 $d="r6"; | |
35 $e="r7"; | |
36 $K="r8"; # "upper" registers can be used in add/sub and mov insns | |
37 $ctx="r9"; | |
38 $inp="r10"; | |
39 $len="r11"; | |
40 $Xi="r12"; | |
41 | |
42 sub common { | |
43 <<___; | |
44 sub $t0,#4 | |
45 ldr $t1,[$t0] | |
46 add $e,$K @ E+=K_xx_xx | |
47 lsl $t2,$a,#5 | |
48 add $t2,$e | |
49 lsr $e,$a,#27 | |
50 add $t2,$e @ E+=ROR(A,27) | |
51 add $t2,$t1 @ E+=X[i] | |
52 ___ | |
53 } | |
54 sub rotate { | |
55 <<___; | |
56 mov $e,$d @ E=D | |
57 mov $d,$c @ D=C | |
58 lsl $c,$b,#30 | |
59 lsr $b,$b,#2 | |
60 orr $c,$b @ C=ROR(B,2) | |
61 mov $b,$a @ B=A | |
62 add $a,$t2,$t1 @ A=E+F_xx_xx(B,C,D) | |
63 ___ | |
64 } | |
65 | |
66 sub BODY_00_19 { | |
67 $code.=$inline?&common():"\tbl .Lcommon\n"; | |
68 $code.=<<___; | |
69 mov $t1,$c | |
70 eor $t1,$d | |
71 and $t1,$b | |
72 eor $t1,$d @ F_00_19(B,C,D) | |
73 ___ | |
74 $code.=$inline?&rotate():"\tbl .Lrotate\n"; | |
75 } | |
76 | |
77 sub BODY_20_39 { | |
78 $code.=$inline?&common():"\tbl .Lcommon\n"; | |
79 $code.=<<___; | |
80 mov $t1,$b | |
81 eor $t1,$c | |
82 eor $t1,$d @ F_20_39(B,C,D) | |
83 ___ | |
84 $code.=$inline?&rotate():"\tbl .Lrotate\n"; | |
85 } | |
86 | |
87 sub BODY_40_59 { | |
88 $code.=$inline?&common():"\tbl .Lcommon\n"; | |
89 $code.=<<___; | |
90 mov $t1,$b | |
91 and $t1,$c | |
92 mov $e,$b | |
93 orr $e,$c | |
94 and $e,$d | |
95 orr $t1,$e @ F_40_59(B,C,D) | |
96 ___ | |
97 $code.=$inline?&rotate():"\tbl .Lrotate\n"; | |
98 } | |
99 | |
100 $code=<<___; | |
101 .text | |
102 .code 16 | |
103 | |
104 .global sha1_block_data_order | |
105 .type sha1_block_data_order,%function | |
106 | |
107 .align 2 | |
108 sha1_block_data_order: | |
109 ___ | |
110 if ($cheat_on_binutils) { | |
111 $code.=<<___; | |
112 .code 32 | |
113 add r3,pc,#1 | |
114 bx r3 @ switch to Thumb ISA | |
115 .code 16 | |
116 ___ | |
117 } | |
118 $code.=<<___; | |
119 push {r4-r7} | |
120 mov r3,r8 | |
121 mov r4,r9 | |
122 mov r5,r10 | |
123 mov r6,r11 | |
124 mov r7,r12 | |
125 push {r3-r7,lr} | |
126 lsl r2,#6 | |
127 mov $ctx,r0 @ save context | |
128 mov $inp,r1 @ save inp | |
129 mov $len,r2 @ save len | |
130 add $len,$inp @ $len to point at inp end | |
131 | |
132 .Lloop: | |
133 mov $Xi,sp | |
134 mov $t2,sp | |
135 sub $t2,#16*4 @ [3] | |
136 .LXload: | |
137 ldrb $a,[$t1,#0] @ $t1 is r1 and holds inp | |
138 ldrb $b,[$t1,#1] | |
139 ldrb $c,[$t1,#2] | |
140 ldrb $d,[$t1,#3] | |
141 lsl $a,#24 | |
142 lsl $b,#16 | |
143 lsl $c,#8 | |
144 orr $a,$b | |
145 orr $a,$c | |
146 orr $a,$d | |
147 add $t1,#4 | |
148 push {$a} | |
149 cmp sp,$t2 | |
150 bne .LXload @ [+14*16] | |
151 | |
152 mov $inp,$t1 @ update $inp | |
153 sub $t2,#32*4 | |
154 sub $t2,#32*4 | |
155 mov $e,#31 @ [+4] | |
156 .LXupdate: | |
157 ldr $a,[sp,#15*4] | |
158 ldr $b,[sp,#13*4] | |
159 ldr $c,[sp,#7*4] | |
160 ldr $d,[sp,#2*4] | |
161 eor $a,$b | |
162 eor $a,$c | |
163 eor $a,$d | |
164 ror $a,$e | |
165 push {$a} | |
166 cmp sp,$t2 | |
167 bne .LXupdate @ [+(11+1)*64] | |
168 | |
169 ldmia $t0!,{$a,$b,$c,$d,$e} @ $t0 is r0 and holds ctx | |
170 mov $t0,$Xi | |
171 | |
172 ldr $t2,.LK_00_19 | |
173 mov $t1,$t0 | |
174 sub $t1,#20*4 | |
175 mov $Xi,$t1 | |
176 mov $K,$t2 @ [+7+4] | |
177 .L_00_19: | |
178 ___ | |
179 &BODY_00_19(); | |
180 $code.=<<___; | |
181 cmp $Xi,$t0 | |
182 bne .L_00_19 @ [+(2+9+4+2+8+2)*20] | |
183 | |
184 ldr $t2,.LK_20_39 | |
185 mov $t1,$t0 | |
186 sub $t1,#20*4 | |
187 mov $Xi,$t1 | |
188 mov $K,$t2 @ [+5] | |
189 .L_20_39_or_60_79: | |
190 ___ | |
191 &BODY_20_39(); | |
192 $code.=<<___; | |
193 cmp $Xi,$t0 | |
194 bne .L_20_39_or_60_79 @ [+(2+9+3+2+8+2)*20*2] | |
195 cmp sp,$t0 | |
196 beq .Ldone @ [+2] | |
197 | |
198 ldr $t2,.LK_40_59 | |
199 mov $t1,$t0 | |
200 sub $t1,#20*4 | |
201 mov $Xi,$t1 | |
202 mov $K,$t2 @ [+5] | |
203 .L_40_59: | |
204 ___ | |
205 &BODY_40_59(); | |
206 $code.=<<___; | |
207 cmp $Xi,$t0 | |
208 bne .L_40_59 @ [+(2+9+6+2+8+2)*20] | |
209 | |
210 ldr $t2,.LK_60_79 | |
211 mov $Xi,sp | |
212 mov $K,$t2 | |
213 b .L_20_39_or_60_79 @ [+4] | |
214 .Ldone: | |
215 mov $t0,$ctx | |
216 ldr $t1,[$t0,#0] | |
217 ldr $t2,[$t0,#4] | |
218 add $a,$t1 | |
219 ldr $t1,[$t0,#8] | |
220 add $b,$t2 | |
221 ldr $t2,[$t0,#12] | |
222 add $c,$t1 | |
223 ldr $t1,[$t0,#16] | |
224 add $d,$t2 | |
225 add $e,$t1 | |
226 stmia $t0!,{$a,$b,$c,$d,$e} @ [+20] | |
227 | |
228 add sp,#80*4 @ deallocate stack frame | |
229 mov $t0,$ctx @ restore ctx | |
230 mov $t1,$inp @ restore inp | |
231 cmp $t1,$len | |
232 beq .Lexit | |
233 b .Lloop @ [+6] total 3212 cycles | |
234 .Lexit: | |
235 pop {r2-r7} | |
236 mov r8,r2 | |
237 mov r9,r3 | |
238 mov r10,r4 | |
239 mov r11,r5 | |
240 mov r12,r6 | |
241 mov lr,r7 | |
242 pop {r4-r7} | |
243 bx lr | |
244 .align 2 | |
245 ___ | |
246 $code.=".Lcommon:\n".&common()."\tmov pc,lr\n" if (!$inline); | |
247 $code.=".Lrotate:\n".&rotate()."\tmov pc,lr\n" if (!$inline); | |
248 $code.=<<___; | |
249 .align 2 | |
250 .LK_00_19: .word 0x5a827999 | |
251 .LK_20_39: .word 0x6ed9eba1 | |
252 .LK_40_59: .word 0x8f1bbcdc | |
253 .LK_60_79: .word 0xca62c1d6 | |
254 .size sha1_block_data_order,.-sha1_block_data_order | |
255 .asciz "SHA1 block transform for Thumb, CRYPTOGAMS by <appro\@openssl.org>" | |
256 ___ | |
257 | |
258 print $code; | |
259 close STDOUT; # enforce flush | |
OLD | NEW |