OLD | NEW |
1 #if defined(__SUNPRO_C) && defined(__sparcv9) | 1 #if defined(__SUNPRO_C) && defined(__sparcv9) |
2 # define ABI64 /* They've said -xarch=v9 at command line */ | 2 # define ABI64 /* They've said -xarch=v9 at command line */ |
3 #elif defined(__GNUC__) && defined(__arch64__) | 3 #elif defined(__GNUC__) && defined(__arch64__) |
4 # define ABI64 /* They've said -m64 at command line */ | 4 # define ABI64 /* They've said -m64 at command line */ |
5 #endif | 5 #endif |
6 | 6 |
7 #ifdef ABI64 | 7 #ifdef ABI64 |
8 .register %g2,#scratch | 8 .register %g2,#scratch |
9 .register %g3,#scratch | 9 .register %g3,#scratch |
10 # define FRAME -192 | 10 # define FRAME -192 |
(...skipping 16 matching lines...) Expand all Loading... |
27 nop | 27 nop |
28 #ifdef __sun | 28 #ifdef __sun |
29 #include <sys/trap.h> | 29 #include <sys/trap.h> |
30 ta ST_CLEAN_WINDOWS | 30 ta ST_CLEAN_WINDOWS |
31 #else | 31 #else |
32 call .walk.reg.wins | 32 call .walk.reg.wins |
33 #endif | 33 #endif |
34 nop | 34 nop |
35 call .PIC.zero.up | 35 call .PIC.zero.up |
36 mov .zero-(.-4),%o0 | 36 mov .zero-(.-4),%o0 |
37 » ldd» [%o0],%f0 | 37 » ld» [%o0],%f0 |
| 38 » ld» [%o0],%f1 |
38 | 39 |
39 subcc %g0,1,%o0 | 40 subcc %g0,1,%o0 |
40 ! Following is V9 "rd %ccr,%o0" instruction. However! V8 | 41 ! Following is V9 "rd %ccr,%o0" instruction. However! V8 |
41 ! specification says that it ("rd %asr2,%o0" in V8 terms) does | 42 ! specification says that it ("rd %asr2,%o0" in V8 terms) does |
42 ! not cause illegal_instruction trap. It therefore can be used | 43 ! not cause illegal_instruction trap. It therefore can be used |
43 ! to determine if the CPU the code is executing on is V8- or | 44 ! to determine if the CPU the code is executing on is V8- or |
44 ! V9-compliant, as V9 returns a distinct value of 0x99, | 45 ! V9-compliant, as V9 returns a distinct value of 0x99, |
45 ! "negative" and "borrow" bits set in both %icc and %xcc. | 46 ! "negative" and "borrow" bits set in both %icc and %xcc. |
46 .word 0x91408000 !rd %ccr,%o0 | 47 .word 0x91408000 !rd %ccr,%o0 |
47 cmp %o0,0x99 | 48 cmp %o0,0x99 |
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
159 clr %l5 | 160 clr %l5 |
160 clr %l6 | 161 clr %l6 |
161 clr %l7 | 162 clr %l7 |
162 add %o0,1,%i0 ! used for debugging | 163 add %o0,1,%i0 ! used for debugging |
163 2: ret | 164 2: ret |
164 restore | 165 restore |
165 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu | 166 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu |
166 | 167 |
167 .global OPENSSL_atomic_add | 168 .global OPENSSL_atomic_add |
168 .type OPENSSL_atomic_add,#function | 169 .type OPENSSL_atomic_add,#function |
| 170 .align 32 |
169 OPENSSL_atomic_add: | 171 OPENSSL_atomic_add: |
170 #ifndef ABI64 | 172 #ifndef ABI64 |
171 subcc %g0,1,%o2 | 173 subcc %g0,1,%o2 |
172 .word 0x95408000 !rd %ccr,%o2, see comment above | 174 .word 0x95408000 !rd %ccr,%o2, see comment above |
173 cmp %o2,0x99 | 175 cmp %o2,0x99 |
174 be .v9 | 176 be .v9 |
175 nop | 177 nop |
176 save %sp,FRAME,%sp | 178 save %sp,FRAME,%sp |
177 ba .enter | 179 ba .enter |
178 nop | 180 nop |
179 #ifdef __sun | 181 #ifdef __sun |
180 ! Note that you don't have to link with libthread to call thr_yield, | 182 ! Note that you do not have to link with libthread to call thr_yield, |
181 ! as libc provides a stub, which is overloaded the moment you link | 183 ! as libc provides a stub, which is overloaded the moment you link |
182 ! with *either* libpthread or libthread... | 184 ! with *either* libpthread or libthread... |
183 #define YIELD_CPU thr_yield | 185 #define YIELD_CPU thr_yield |
184 #else | 186 #else |
185 ! applies at least to Linux and FreeBSD... Feedback expected... | 187 ! applies at least to Linux and FreeBSD... Feedback expected... |
186 #define YIELD_CPU sched_yield | 188 #define YIELD_CPU sched_yield |
187 #endif | 189 #endif |
188 .spin: call YIELD_CPU | 190 .spin: call YIELD_CPU |
189 nop | 191 nop |
190 .enter: ld [%i0],%i2 | 192 .enter: ld [%i0],%i2 |
(...skipping 15 matching lines...) Expand all Loading... |
206 1: add %o1,%o2,%o3 | 208 1: add %o1,%o2,%o3 |
207 .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and s
wap %o3 | 209 .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and s
wap %o3 |
208 cmp %o2,%o3 | 210 cmp %o2,%o3 |
209 bne 1b | 211 bne 1b |
210 mov %o3,%o2 ! cas is always fetching to dest. register | 212 mov %o3,%o2 ! cas is always fetching to dest. register |
211 add %o1,%o2,%o0 ! OpenSSL expects the new value | 213 add %o1,%o2,%o0 ! OpenSSL expects the new value |
212 retl | 214 retl |
213 sra %o0,%g0,%o0 ! we return signed int, remember? | 215 sra %o0,%g0,%o0 ! we return signed int, remember? |
214 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add | 216 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add |
215 | 217 |
216 .global»OPENSSL_rdtsc | 218 .global»_sparcv9_rdtick |
| 219 .align» 32 |
| 220 _sparcv9_rdtick: |
217 subcc %g0,1,%o0 | 221 subcc %g0,1,%o0 |
218 .word 0x91408000 !rd %ccr,%o0 | 222 .word 0x91408000 !rd %ccr,%o0 |
219 cmp %o0,0x99 | 223 cmp %o0,0x99 |
220 » bne» .notsc | 224 » bne» .notick |
221 xor %o0,%o0,%o0 | 225 xor %o0,%o0,%o0 |
222 » save» %sp,FRAME-16,%sp | 226 » .word» 0x91410000» !rd» %tick,%o0 |
223 » mov» 513,%o0»» !SI_PLATFORM | 227 » retl |
224 » add» %sp,BIAS+16,%o1 | 228 » .word» 0x93323020» !srlx» %o0,32,%o1 |
225 » call» sysinfo | 229 .notick: |
226 » mov» 256,%o2 | 230 » retl |
| 231 » xor» %o1,%o1,%o1 |
| 232 .type» _sparcv9_rdtick,#function |
| 233 .size» _sparcv9_rdtick,.-_sparcv9_rdtick |
227 | 234 |
228 » add» %sp,BIAS-16,%o1 | 235 .global»_sparcv9_vis1_probe |
229 » ld» [%o1],%l0 | 236 .align» 8 |
230 » ld» [%o1+4],%l1 | 237 _sparcv9_vis1_probe: |
231 » ld» [%o1+8],%l2 | 238 » .word» 0x81b00d80» !fxor» %f0,%f0,%f0 |
232 » mov» %lo('SUNW'),%l3 | 239 » add» %sp,BIAS+2,%o1 |
233 » ret | 240 » retl |
234 » restore | 241 » .word» 0xc19a5a40» !ldda» [%o1]ASI_FP16_P,%f0 |
235 .notsc: | 242 .type» _sparcv9_vis1_probe,#function |
| 243 .size» _sparcv9_vis1_probe,.-_sparcv9_vis1_probe |
| 244 |
| 245 ! Probe and instrument VIS1 instruction. Output is number of cycles it |
| 246 ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit |
| 247 ! is slow (documented to be 6 cycles on T2) and the core is in-order |
| 248 ! single-issue, it should be possible to distinguish Tx reliably... |
| 249 ! Observed return values are: |
| 250 ! |
| 251 !» UltraSPARC IIe» » 7 |
| 252 !» UltraSPARC III» » 7 |
| 253 !» UltraSPARC T1» » 24 |
| 254 ! |
| 255 ! Numbers for T2 and SPARC64 V-VII are more than welcomed. |
| 256 ! |
| 257 ! It would be possible to detect specifically US-T1 by instrumenting |
| 258 ! fmul8ulx16, which is emulated on T1 and as such accounts for quite |
| 259 ! a lot of %tick-s, couple of thousand on Linux... |
| 260 .global»_sparcv9_vis1_instrument |
| 261 .align» 8 |
| 262 _sparcv9_vis1_instrument: |
| 263 » .word» 0x91410000» !rd» %tick,%o0 |
| 264 » .word» 0x81b00d80» !fxor» %f0,%f0,%f0 |
| 265 » .word» 0x85b08d82» !fxor» %f2,%f2,%f2 |
| 266 » .word» 0x93410000» !rd» %tick,%o1 |
| 267 » .word» 0x81b00d80» !fxor» %f0,%f0,%f0 |
| 268 » .word» 0x85b08d82» !fxor» %f2,%f2,%f2 |
| 269 » .word» 0x95410000» !rd» %tick,%o2 |
| 270 » .word» 0x81b00d80» !fxor» %f0,%f0,%f0 |
| 271 » .word» 0x85b08d82» !fxor» %f2,%f2,%f2 |
| 272 » .word» 0x97410000» !rd» %tick,%o3 |
| 273 » .word» 0x81b00d80» !fxor» %f0,%f0,%f0 |
| 274 » .word» 0x85b08d82» !fxor» %f2,%f2,%f2 |
| 275 » .word» 0x99410000» !rd» %tick,%o4 |
| 276 |
| 277 » ! calculate intervals |
| 278 » sub» %o1,%o0,%o0 |
| 279 » sub» %o2,%o1,%o1 |
| 280 » sub» %o3,%o2,%o2 |
| 281 » sub» %o4,%o3,%o3 |
| 282 |
| 283 » ! find minumum value |
| 284 » cmp» %o0,%o1 |
| 285 » .word» 0x38680002» !bgu,a» %xcc,.+8 |
| 286 » mov» %o1,%o0 |
| 287 » cmp» %o0,%o2 |
| 288 » .word» 0x38680002» !bgu,a» %xcc,.+8 |
| 289 » mov» %o2,%o0 |
| 290 » cmp» %o0,%o3 |
| 291 » .word» 0x38680002» !bgu,a» %xcc,.+8 |
| 292 » mov» %o3,%o0 |
| 293 |
236 retl | 294 retl |
237 nop | 295 nop |
238 .type» OPENSSL_rdtsc,#function | 296 .type» _sparcv9_vis1_instrument,#function |
239 .size» OPENSSL_rdtsc,.-OPENSSL_atomic_add | 297 .size» _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument |
| 298 |
| 299 .global»_sparcv9_vis2_probe |
| 300 .align» 8 |
| 301 _sparcv9_vis2_probe: |
| 302 » retl |
| 303 » .word» 0x81b00980» !bshuffle» %f0,%f0,%f0 |
| 304 .type» _sparcv9_vis2_probe,#function |
| 305 .size» _sparcv9_vis2_probe,.-_sparcv9_vis2_probe |
| 306 |
| 307 .global»_sparcv9_fmadd_probe |
| 308 .align» 8 |
| 309 _sparcv9_fmadd_probe: |
| 310 » .word» 0x81b00d80» !fxor» %f0,%f0,%f0 |
| 311 » .word» 0x85b08d82» !fxor» %f2,%f2,%f2 |
| 312 » retl |
| 313 » .word» 0x81b80440» !fmaddd»%f0,%f0,%f2,%f0 |
| 314 .type» _sparcv9_fmadd_probe,#function |
| 315 .size» _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe |
| 316 |
| 317 .global»OPENSSL_cleanse |
| 318 .align» 32 |
| 319 OPENSSL_cleanse: |
| 320 » cmp» %o1,14 |
| 321 » nop |
| 322 #ifdef ABI64 |
| 323 » bgu» %xcc,.Lot |
| 324 #else |
| 325 » bgu» .Lot |
| 326 #endif |
| 327 » cmp» %o1,0 |
| 328 » bne» .Little |
| 329 » nop |
| 330 » retl |
| 331 » nop |
| 332 |
| 333 .Little: |
| 334 » stb» %g0,[%o0] |
| 335 » subcc» %o1,1,%o1 |
| 336 » bnz» .Little |
| 337 » add» %o0,1,%o0 |
| 338 » retl |
| 339 » nop |
| 340 .align» 32 |
| 341 .Lot: |
| 342 #ifndef ABI64 |
| 343 » subcc» %g0,1,%g1 |
| 344 » ! see above for explanation |
| 345 » .word» 0x83408000» !rd» %ccr,%g1 |
| 346 » cmp» %g1,0x99 |
| 347 » bne» .v8lot |
| 348 » nop |
| 349 #endif |
| 350 |
| 351 .v9lot:»andcc» %o0,7,%g0 |
| 352 » bz» .v9aligned |
| 353 » nop |
| 354 » stb» %g0,[%o0] |
| 355 » sub» %o1,1,%o1 |
| 356 » ba» .v9lot |
| 357 » add» %o0,1,%o0 |
| 358 .align» 16,0x01000000 |
| 359 .v9aligned: |
| 360 » .word» 0xc0720000» !stx» %g0,[%o0] |
| 361 » sub» %o1,8,%o1 |
| 362 » andcc» %o1,-8,%g0 |
| 363 #ifdef ABI64 |
| 364 » .word» 0x126ffffd» !bnz» %xcc,.v9aligned |
| 365 #else |
| 366 » .word» 0x124ffffd» !bnz» %icc,.v9aligned |
| 367 #endif |
| 368 » add» %o0,8,%o0 |
| 369 |
| 370 » cmp» %o1,0 |
| 371 » bne» .Little |
| 372 » nop |
| 373 » retl |
| 374 » nop |
| 375 #ifndef ABI64 |
| 376 .v8lot:»andcc» %o0,3,%g0 |
| 377 » bz» .v8aligned |
| 378 » nop |
| 379 » stb» %g0,[%o0] |
| 380 » sub» %o1,1,%o1 |
| 381 » ba» .v8lot |
| 382 » add» %o0,1,%o0 |
| 383 » nop |
| 384 .v8aligned: |
| 385 » st» %g0,[%o0] |
| 386 » sub» %o1,4,%o1 |
| 387 » andcc» %o1,-4,%g0 |
| 388 » bnz» .v8aligned |
| 389 » add» %o0,4,%o0 |
| 390 |
| 391 » cmp» %o1,0 |
| 392 » bne» .Little |
| 393 » nop |
| 394 » retl |
| 395 » nop |
| 396 #endif |
| 397 .type» OPENSSL_cleanse,#function |
| 398 .size» OPENSSL_cleanse,.-OPENSSL_cleanse |
| 399 |
| 400 .section» ".init",#alloc,#execinstr |
| 401 » call» OPENSSL_cpuid_setup |
| 402 » nop |
OLD | NEW |