OLD | NEW |
| (Empty) |
1 #if defined(__SUNPRO_C) && defined(__sparcv9) | |
2 # define ABI64 /* They've said -xarch=v9 at command line */ | |
3 #elif defined(__GNUC__) && defined(__arch64__) | |
4 # define ABI64 /* They've said -m64 at command line */ | |
5 #endif | |
6 | |
7 #ifdef ABI64 | |
8 .register %g2,#scratch | |
9 .register %g3,#scratch | |
10 # define FRAME -192 | |
11 # define BIAS 2047 | |
12 #else | |
13 # define FRAME -96 | |
14 # define BIAS 0 | |
15 #endif | |
16 | |
17 .text | |
18 .align 32 | |
19 .global OPENSSL_wipe_cpu | |
20 .type OPENSSL_wipe_cpu,#function | |
21 ! Keep in mind that this does not excuse us from wiping the stack! | |
22 ! This routine wipes registers, but not the backing store [which | |
23 ! resides on the stack, toward lower addresses]. To facilitate for | |
24 ! stack wiping I return pointer to the top of stack of the *caller*. | |
25 OPENSSL_wipe_cpu: | |
26 save %sp,FRAME,%sp | |
27 nop | |
28 #ifdef __sun | |
29 #include <sys/trap.h> | |
30 ta ST_CLEAN_WINDOWS | |
31 #else | |
32 call .walk.reg.wins | |
33 #endif | |
34 nop | |
35 call .PIC.zero.up | |
36 mov .zero-(.-4),%o0 | |
37 ld [%o0],%f0 | |
38 ld [%o0],%f1 | |
39 | |
40 subcc %g0,1,%o0 | |
41 ! Following is V9 "rd %ccr,%o0" instruction. However! V8 | |
42 ! specification says that it ("rd %asr2,%o0" in V8 terms) does | |
43 ! not cause illegal_instruction trap. It therefore can be used | |
44 ! to determine if the CPU the code is executing on is V8- or | |
45 ! V9-compliant, as V9 returns a distinct value of 0x99, | |
46 ! "negative" and "borrow" bits set in both %icc and %xcc. | |
47 .word 0x91408000 !rd %ccr,%o0 | |
48 cmp %o0,0x99 | |
49 bne .v8 | |
50 nop | |
51 ! Even though we do not use %fp register bank, | |
52 ! we wipe it as memcpy might have used it... | |
53 .word 0xbfa00040 !fmovd %f0,%f62 | |
54 .word 0xbba00040 !... | |
55 .word 0xb7a00040 | |
56 .word 0xb3a00040 | |
57 .word 0xafa00040 | |
58 .word 0xaba00040 | |
59 .word 0xa7a00040 | |
60 .word 0xa3a00040 | |
61 .word 0x9fa00040 | |
62 .word 0x9ba00040 | |
63 .word 0x97a00040 | |
64 .word 0x93a00040 | |
65 .word 0x8fa00040 | |
66 .word 0x8ba00040 | |
67 .word 0x87a00040 | |
68 .word 0x83a00040 !fmovd %f0,%f32 | |
69 .v8: fmovs %f1,%f31 | |
70 clr %o0 | |
71 fmovs %f0,%f30 | |
72 clr %o1 | |
73 fmovs %f1,%f29 | |
74 clr %o2 | |
75 fmovs %f0,%f28 | |
76 clr %o3 | |
77 fmovs %f1,%f27 | |
78 clr %o4 | |
79 fmovs %f0,%f26 | |
80 clr %o5 | |
81 fmovs %f1,%f25 | |
82 clr %o7 | |
83 fmovs %f0,%f24 | |
84 clr %l0 | |
85 fmovs %f1,%f23 | |
86 clr %l1 | |
87 fmovs %f0,%f22 | |
88 clr %l2 | |
89 fmovs %f1,%f21 | |
90 clr %l3 | |
91 fmovs %f0,%f20 | |
92 clr %l4 | |
93 fmovs %f1,%f19 | |
94 clr %l5 | |
95 fmovs %f0,%f18 | |
96 clr %l6 | |
97 fmovs %f1,%f17 | |
98 clr %l7 | |
99 fmovs %f0,%f16 | |
100 clr %i0 | |
101 fmovs %f1,%f15 | |
102 clr %i1 | |
103 fmovs %f0,%f14 | |
104 clr %i2 | |
105 fmovs %f1,%f13 | |
106 clr %i3 | |
107 fmovs %f0,%f12 | |
108 clr %i4 | |
109 fmovs %f1,%f11 | |
110 clr %i5 | |
111 fmovs %f0,%f10 | |
112 clr %g1 | |
113 fmovs %f1,%f9 | |
114 clr %g2 | |
115 fmovs %f0,%f8 | |
116 clr %g3 | |
117 fmovs %f1,%f7 | |
118 clr %g4 | |
119 fmovs %f0,%f6 | |
120 clr %g5 | |
121 fmovs %f1,%f5 | |
122 fmovs %f0,%f4 | |
123 fmovs %f1,%f3 | |
124 fmovs %f0,%f2 | |
125 | |
126 add %fp,BIAS,%i0 ! return pointer to callerĀ“s top of stack | |
127 | |
128 ret | |
129 restore | |
130 | |
131 .zero: .long 0x0,0x0 | |
132 .PIC.zero.up: | |
133 retl | |
134 add %o0,%o7,%o0 | |
135 #ifdef DEBUG | |
136 .global walk_reg_wins | |
137 .type walk_reg_wins,#function | |
138 walk_reg_wins: | |
139 #endif | |
140 .walk.reg.wins: | |
141 save %sp,FRAME,%sp | |
142 cmp %i7,%o7 | |
143 be 2f | |
144 clr %o0 | |
145 cmp %o7,0 ! compiler never cleans %o7... | |
146 be 1f ! could have been a leaf function... | |
147 clr %o1 | |
148 call .walk.reg.wins | |
149 nop | |
150 1: clr %o2 | |
151 clr %o3 | |
152 clr %o4 | |
153 clr %o5 | |
154 clr %o7 | |
155 clr %l0 | |
156 clr %l1 | |
157 clr %l2 | |
158 clr %l3 | |
159 clr %l4 | |
160 clr %l5 | |
161 clr %l6 | |
162 clr %l7 | |
163 add %o0,1,%i0 ! used for debugging | |
164 2: ret | |
165 restore | |
166 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu | |
167 | |
168 .global OPENSSL_atomic_add | |
169 .type OPENSSL_atomic_add,#function | |
170 .align 32 | |
171 OPENSSL_atomic_add: | |
172 #ifndef ABI64 | |
173 subcc %g0,1,%o2 | |
174 .word 0x95408000 !rd %ccr,%o2, see comment above | |
175 cmp %o2,0x99 | |
176 be .v9 | |
177 nop | |
178 save %sp,FRAME,%sp | |
179 ba .enter | |
180 nop | |
181 #ifdef __sun | |
182 ! Note that you do not have to link with libthread to call thr_yield, | |
183 ! as libc provides a stub, which is overloaded the moment you link | |
184 ! with *either* libpthread or libthread... | |
185 #define YIELD_CPU thr_yield | |
186 #else | |
187 ! applies at least to Linux and FreeBSD... Feedback expected... | |
188 #define YIELD_CPU sched_yield | |
189 #endif | |
190 .spin: call YIELD_CPU | |
191 nop | |
192 .enter: ld [%i0],%i2 | |
193 cmp %i2,-4096 | |
194 be .spin | |
195 mov -1,%i2 | |
196 swap [%i0],%i2 | |
197 cmp %i2,-1 | |
198 be .spin | |
199 add %i2,%i1,%i2 | |
200 stbar | |
201 st %i2,[%i0] | |
202 sra %i2,%g0,%i0 | |
203 ret | |
204 restore | |
205 .v9: | |
206 #endif | |
207 ld [%o0],%o2 | |
208 1: add %o1,%o2,%o3 | |
209 .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and s
wap %o3 | |
210 cmp %o2,%o3 | |
211 bne 1b | |
212 mov %o3,%o2 ! cas is always fetching to dest. register | |
213 add %o1,%o2,%o0 ! OpenSSL expects the new value | |
214 retl | |
215 sra %o0,%g0,%o0 ! we return signed int, remember? | |
216 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add | |
217 | |
218 .global _sparcv9_rdtick | |
219 .align 32 | |
220 _sparcv9_rdtick: | |
221 subcc %g0,1,%o0 | |
222 .word 0x91408000 !rd %ccr,%o0 | |
223 cmp %o0,0x99 | |
224 bne .notick | |
225 xor %o0,%o0,%o0 | |
226 .word 0x91410000 !rd %tick,%o0 | |
227 retl | |
228 .word 0x93323020 !srlx %o0,32,%o1 | |
229 .notick: | |
230 retl | |
231 xor %o1,%o1,%o1 | |
232 .type _sparcv9_rdtick,#function | |
233 .size _sparcv9_rdtick,.-_sparcv9_rdtick | |
234 | |
235 .global _sparcv9_vis1_probe | |
236 .align 8 | |
237 _sparcv9_vis1_probe: | |
238 add %sp,BIAS+2,%o1 | |
239 .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0 | |
240 retl | |
241 .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
242 .type _sparcv9_vis1_probe,#function | |
243 .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe | |
244 | |
245 ! Probe and instrument VIS1 instruction. Output is number of cycles it | |
246 ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit | |
247 ! is slow (documented to be 6 cycles on T2) and the core is in-order | |
248 ! single-issue, it should be possible to distinguish Tx reliably... | |
249 ! Observed return values are: | |
250 ! | |
251 ! UltraSPARC IIe 7 | |
252 ! UltraSPARC III 7 | |
253 ! UltraSPARC T1 24 | |
254 ! | |
255 ! Numbers for T2 and SPARC64 V-VII are more than welcomed. | |
256 ! | |
257 ! It would be possible to detect specifically US-T1 by instrumenting | |
258 ! fmul8ulx16, which is emulated on T1 and as such accounts for quite | |
259 ! a lot of %tick-s, couple of thousand on Linux... | |
260 .global _sparcv9_vis1_instrument | |
261 .align 8 | |
262 _sparcv9_vis1_instrument: | |
263 .word 0x91410000 !rd %tick,%o0 | |
264 .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
265 .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
266 .word 0x93410000 !rd %tick,%o1 | |
267 .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
268 .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
269 .word 0x95410000 !rd %tick,%o2 | |
270 .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
271 .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
272 .word 0x97410000 !rd %tick,%o3 | |
273 .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
274 .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
275 .word 0x99410000 !rd %tick,%o4 | |
276 | |
277 ! calculate intervals | |
278 sub %o1,%o0,%o0 | |
279 sub %o2,%o1,%o1 | |
280 sub %o3,%o2,%o2 | |
281 sub %o4,%o3,%o3 | |
282 | |
283 ! find minumum value | |
284 cmp %o0,%o1 | |
285 .word 0x38680002 !bgu,a %xcc,.+8 | |
286 mov %o1,%o0 | |
287 cmp %o0,%o2 | |
288 .word 0x38680002 !bgu,a %xcc,.+8 | |
289 mov %o2,%o0 | |
290 cmp %o0,%o3 | |
291 .word 0x38680002 !bgu,a %xcc,.+8 | |
292 mov %o3,%o0 | |
293 | |
294 retl | |
295 nop | |
296 .type _sparcv9_vis1_instrument,#function | |
297 .size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument | |
298 | |
299 .global _sparcv9_vis2_probe | |
300 .align 8 | |
301 _sparcv9_vis2_probe: | |
302 retl | |
303 .word 0x81b00980 !bshuffle %f0,%f0,%f0 | |
304 .type _sparcv9_vis2_probe,#function | |
305 .size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe | |
306 | |
307 .global _sparcv9_fmadd_probe | |
308 .align 8 | |
309 _sparcv9_fmadd_probe: | |
310 .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
311 .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
312 retl | |
313 .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0 | |
314 .type _sparcv9_fmadd_probe,#function | |
315 .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe | |
316 | |
317 .global OPENSSL_cleanse | |
318 .align 32 | |
319 OPENSSL_cleanse: | |
320 cmp %o1,14 | |
321 nop | |
322 #ifdef ABI64 | |
323 bgu %xcc,.Lot | |
324 #else | |
325 bgu .Lot | |
326 #endif | |
327 cmp %o1,0 | |
328 bne .Little | |
329 nop | |
330 retl | |
331 nop | |
332 | |
333 .Little: | |
334 stb %g0,[%o0] | |
335 subcc %o1,1,%o1 | |
336 bnz .Little | |
337 add %o0,1,%o0 | |
338 retl | |
339 nop | |
340 .align 32 | |
341 .Lot: | |
342 #ifndef ABI64 | |
343 subcc %g0,1,%g1 | |
344 ! see above for explanation | |
345 .word 0x83408000 !rd %ccr,%g1 | |
346 cmp %g1,0x99 | |
347 bne .v8lot | |
348 nop | |
349 #endif | |
350 | |
351 .v9lot: andcc %o0,7,%g0 | |
352 bz .v9aligned | |
353 nop | |
354 stb %g0,[%o0] | |
355 sub %o1,1,%o1 | |
356 ba .v9lot | |
357 add %o0,1,%o0 | |
358 .align 16,0x01000000 | |
359 .v9aligned: | |
360 .word 0xc0720000 !stx %g0,[%o0] | |
361 sub %o1,8,%o1 | |
362 andcc %o1,-8,%g0 | |
363 #ifdef ABI64 | |
364 .word 0x126ffffd !bnz %xcc,.v9aligned | |
365 #else | |
366 .word 0x124ffffd !bnz %icc,.v9aligned | |
367 #endif | |
368 add %o0,8,%o0 | |
369 | |
370 cmp %o1,0 | |
371 bne .Little | |
372 nop | |
373 retl | |
374 nop | |
375 #ifndef ABI64 | |
376 .v8lot: andcc %o0,3,%g0 | |
377 bz .v8aligned | |
378 nop | |
379 stb %g0,[%o0] | |
380 sub %o1,1,%o1 | |
381 ba .v8lot | |
382 add %o0,1,%o0 | |
383 nop | |
384 .v8aligned: | |
385 st %g0,[%o0] | |
386 sub %o1,4,%o1 | |
387 andcc %o1,-4,%g0 | |
388 bnz .v8aligned | |
389 add %o0,4,%o0 | |
390 | |
391 cmp %o1,0 | |
392 bne .Little | |
393 nop | |
394 retl | |
395 nop | |
396 #endif | |
397 .type OPENSSL_cleanse,#function | |
398 .size OPENSSL_cleanse,.-OPENSSL_cleanse | |
399 | |
400 .section ".init",#alloc,#execinstr | |
401 call OPENSSL_cpuid_setup | |
402 nop | |
OLD | NEW |