Index: openssl/crypto/sparccpuid.S |
=================================================================== |
--- openssl/crypto/sparccpuid.S (revision 105093) |
+++ openssl/crypto/sparccpuid.S (working copy) |
@@ -34,7 +34,8 @@ |
nop |
call .PIC.zero.up |
mov .zero-(.-4),%o0 |
- ldd [%o0],%f0 |
+ ld [%o0],%f0 |
+ ld [%o0],%f1 |
subcc %g0,1,%o0 |
! Following is V9 "rd %ccr,%o0" instruction. However! V8 |
@@ -166,6 +167,7 @@ |
.global OPENSSL_atomic_add |
.type OPENSSL_atomic_add,#function |
+.align 32 |
OPENSSL_atomic_add: |
#ifndef ABI64 |
subcc %g0,1,%o2 |
@@ -177,7 +179,7 @@ |
ba .enter |
nop |
#ifdef __sun |
-! Note that you don't have to link with libthread to call thr_yield, |
+! Note that you do not have to link with libthread to call thr_yield, |
! as libc provides a stub, which is overloaded the moment you link |
! with *either* libpthread or libthread... |
#define YIELD_CPU thr_yield |
@@ -213,27 +215,188 @@ |
sra %o0,%g0,%o0 ! we return signed int, remember? |
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add |
-.global OPENSSL_rdtsc |
+.global _sparcv9_rdtick |
+.align 32 |
+_sparcv9_rdtick: |
subcc %g0,1,%o0 |
.word 0x91408000 !rd %ccr,%o0 |
cmp %o0,0x99 |
- bne .notsc |
+ bne .notick |
xor %o0,%o0,%o0 |
- save %sp,FRAME-16,%sp |
- mov 513,%o0 !SI_PLATFORM |
- add %sp,BIAS+16,%o1 |
- call sysinfo |
- mov 256,%o2 |
+ .word 0x91410000 !rd %tick,%o0 |
+ retl |
+ .word 0x93323020 !srlx %o0,32,%o1 |
+.notick: |
+ retl |
+ xor %o1,%o1,%o1 |
+.type _sparcv9_rdtick,#function |
+.size _sparcv9_rdtick,.-_sparcv9_rdtick |
- add %sp,BIAS-16,%o1 |
- ld [%o1],%l0 |
- ld [%o1+4],%l1 |
- ld [%o1+8],%l2 |
- mov %lo('SUNW'),%l3 |
- ret |
- restore |
-.notsc: |
+.global _sparcv9_vis1_probe |
+.align 8 |
+_sparcv9_vis1_probe: |
+ .word 0x81b00d80 !fxor %f0,%f0,%f0 |
+ add %sp,BIAS+2,%o1 |
retl |
+ .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0 |
+.type _sparcv9_vis1_probe,#function |
+.size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe |
+ |
+! Probe and instrument VIS1 instruction. Output is number of cycles it |
+! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit |
+! is slow (documented to be 6 cycles on T2) and the core is in-order |
+! single-issue, it should be possible to distinguish Tx reliably... |
+! Observed return values are: |
+! |
+! UltraSPARC IIe 7 |
+! UltraSPARC III 7 |
+! UltraSPARC T1 24 |
+! |
+! Numbers for T2 and SPARC64 V-VII are more than welcomed. |
+! |
+! It would be possible to detect specifically US-T1 by instrumenting |
+! fmul8ulx16, which is emulated on T1 and as such accounts for quite |
+! a lot of %tick-s, couple of thousand on Linux... |
+.global _sparcv9_vis1_instrument |
+.align 8 |
+_sparcv9_vis1_instrument: |
+ .word 0x91410000 !rd %tick,%o0 |
+ .word 0x81b00d80 !fxor %f0,%f0,%f0 |
+ .word 0x85b08d82 !fxor %f2,%f2,%f2 |
+ .word 0x93410000 !rd %tick,%o1 |
+ .word 0x81b00d80 !fxor %f0,%f0,%f0 |
+ .word 0x85b08d82 !fxor %f2,%f2,%f2 |
+ .word 0x95410000 !rd %tick,%o2 |
+ .word 0x81b00d80 !fxor %f0,%f0,%f0 |
+ .word 0x85b08d82 !fxor %f2,%f2,%f2 |
+ .word 0x97410000 !rd %tick,%o3 |
+ .word 0x81b00d80 !fxor %f0,%f0,%f0 |
+ .word 0x85b08d82 !fxor %f2,%f2,%f2 |
+ .word 0x99410000 !rd %tick,%o4 |
+ |
+ ! calculate intervals |
+ sub %o1,%o0,%o0 |
+ sub %o2,%o1,%o1 |
+ sub %o3,%o2,%o2 |
+ sub %o4,%o3,%o3 |
+ |
+ ! find minumum value |
+ cmp %o0,%o1 |
+ .word 0x38680002 !bgu,a %xcc,.+8 |
+ mov %o1,%o0 |
+ cmp %o0,%o2 |
+ .word 0x38680002 !bgu,a %xcc,.+8 |
+ mov %o2,%o0 |
+ cmp %o0,%o3 |
+ .word 0x38680002 !bgu,a %xcc,.+8 |
+ mov %o3,%o0 |
+ |
+ retl |
nop |
-.type OPENSSL_rdtsc,#function |
-.size OPENSSL_rdtsc,.-OPENSSL_atomic_add |
+.type _sparcv9_vis1_instrument,#function |
+.size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument |
+ |
+.global _sparcv9_vis2_probe |
+.align 8 |
+_sparcv9_vis2_probe: |
+ retl |
+ .word 0x81b00980 !bshuffle %f0,%f0,%f0 |
+.type _sparcv9_vis2_probe,#function |
+.size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe |
+ |
+.global _sparcv9_fmadd_probe |
+.align 8 |
+_sparcv9_fmadd_probe: |
+ .word 0x81b00d80 !fxor %f0,%f0,%f0 |
+ .word 0x85b08d82 !fxor %f2,%f2,%f2 |
+ retl |
+ .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0 |
+.type _sparcv9_fmadd_probe,#function |
+.size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe |
+ |
+.global OPENSSL_cleanse |
+.align 32 |
+OPENSSL_cleanse: |
+ cmp %o1,14 |
+ nop |
+#ifdef ABI64 |
+ bgu %xcc,.Lot |
+#else |
+ bgu .Lot |
+#endif |
+ cmp %o1,0 |
+ bne .Little |
+ nop |
+ retl |
+ nop |
+ |
+.Little: |
+ stb %g0,[%o0] |
+ subcc %o1,1,%o1 |
+ bnz .Little |
+ add %o0,1,%o0 |
+ retl |
+ nop |
+.align 32 |
+.Lot: |
+#ifndef ABI64 |
+ subcc %g0,1,%g1 |
+ ! see above for explanation |
+ .word 0x83408000 !rd %ccr,%g1 |
+ cmp %g1,0x99 |
+ bne .v8lot |
+ nop |
+#endif |
+ |
+.v9lot: andcc %o0,7,%g0 |
+ bz .v9aligned |
+ nop |
+ stb %g0,[%o0] |
+ sub %o1,1,%o1 |
+ ba .v9lot |
+ add %o0,1,%o0 |
+.align 16,0x01000000 |
+.v9aligned: |
+ .word 0xc0720000 !stx %g0,[%o0] |
+ sub %o1,8,%o1 |
+ andcc %o1,-8,%g0 |
+#ifdef ABI64 |
+ .word 0x126ffffd !bnz %xcc,.v9aligned |
+#else |
+ .word 0x124ffffd !bnz %icc,.v9aligned |
+#endif |
+ add %o0,8,%o0 |
+ |
+ cmp %o1,0 |
+ bne .Little |
+ nop |
+ retl |
+ nop |
+#ifndef ABI64 |
+.v8lot: andcc %o0,3,%g0 |
+ bz .v8aligned |
+ nop |
+ stb %g0,[%o0] |
+ sub %o1,1,%o1 |
+ ba .v8lot |
+ add %o0,1,%o0 |
+ nop |
+.v8aligned: |
+ st %g0,[%o0] |
+ sub %o1,4,%o1 |
+ andcc %o1,-4,%g0 |
+ bnz .v8aligned |
+ add %o0,4,%o0 |
+ |
+ cmp %o1,0 |
+ bne .Little |
+ nop |
+ retl |
+ nop |
+#endif |
+.type OPENSSL_cleanse,#function |
+.size OPENSSL_cleanse,.-OPENSSL_cleanse |
+ |
+.section ".init",#alloc,#execinstr |
+ call OPENSSL_cpuid_setup |
+ nop |