OLD | NEW |
1 #!/usr/bin/env perl | 1 #!/usr/bin/env perl |
2 | 2 |
3 $output=shift; | 3 $flavour = shift; |
4 $masm=1 if ($output =~ /\.asm/); | 4 $output = shift; |
5 open STDOUT,">$output" || die "can't open $output: $!"; | 5 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } |
6 | 6 |
7 print<<___ if(defined($masm)); | 7 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); |
8 _TEXT» SEGMENT | |
9 PUBLIC» OPENSSL_rdtsc | |
10 | 8 |
11 PUBLIC» OPENSSL_atomic_add | 9 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
12 ALIGN» 16 | 10 open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output"; |
13 OPENSSL_atomic_add» PROC | |
14 » mov» eax,DWORD PTR[rcx] | |
15 \$Lspin:» lea» r8,DWORD PTR[rdx+rax] | |
16 lock» cmpxchg»DWORD PTR[rcx],r8d | |
17 » jne» \$Lspin | |
18 » mov» eax,r8d | |
19 » cdqe | |
20 » ret | |
21 OPENSSL_atomic_add» ENDP | |
22 | 11 |
23 PUBLIC» OPENSSL_wipe_cpu | 12 if ($win64)» { $arg1="%rcx"; $arg2="%rdx"; } |
24 ALIGN» 16 | 13 else» » { $arg1="%rdi"; $arg2="%rsi"; } |
25 OPENSSL_wipe_cpu» PROC | 14 print<<___; |
26 » pxor» xmm0,xmm0 | 15 .extern»» OPENSSL_cpuid_setup |
27 » pxor» xmm1,xmm1 | 16 .section» .init |
28 » pxor» xmm2,xmm2 | 17 » call» OPENSSL_cpuid_setup |
29 » pxor» xmm3,xmm3 | |
30 » pxor» xmm4,xmm4 | |
31 » pxor» xmm5,xmm5 | |
32 » xor» rcx,rcx | |
33 » xor» rdx,rdx | |
34 » xor» r8,r8 | |
35 » xor» r9,r9 | |
36 » xor» r10,r10 | |
37 » xor» r11,r11 | |
38 » lea» rax,QWORD PTR[rsp+8] | |
39 » ret | |
40 OPENSSL_wipe_cpu» ENDP | |
41 _TEXT» ENDS | |
42 | 18 |
43 CRT\$XIU SEGMENT | |
44 EXTRN OPENSSL_cpuid_setup:PROC | |
45 DQ OPENSSL_cpuid_setup | |
46 CRT\$XIU ENDS | |
47 | |
48 ___ | |
49 print<<___ if(!defined($masm)); | |
50 .text | 19 .text |
51 | 20 |
52 .globl OPENSSL_atomic_add | 21 .globl OPENSSL_atomic_add |
53 .type» OPENSSL_atomic_add,\@function | 22 .type» OPENSSL_atomic_add,\@abi-omnipotent |
54 .align 16 | 23 .align 16 |
55 OPENSSL_atomic_add: | 24 OPENSSL_atomic_add: |
56 » movl» (%rdi),%eax | 25 » movl» ($arg1),%eax |
57 .Lspin:»leaq» (%rsi,%rax),%r8 | 26 .Lspin:»leaq» ($arg2,%rax),%r8 |
58 lock;» cmpxchgl» %r8d,(%rdi) | 27 » .byte» 0xf0» » # lock |
| 28 » cmpxchgl» %r8d,($arg1) |
59 jne .Lspin | 29 jne .Lspin |
60 movl %r8d,%eax | 30 movl %r8d,%eax |
61 » .byte» 0x48,0x98 | 31 » .byte» 0x48,0x98» # cltq/cdqe |
62 ret | 32 ret |
63 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add | 33 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add |
64 | 34 |
65 .globl OPENSSL_wipe_cpu | |
66 .type OPENSSL_wipe_cpu,\@function | |
67 .align 16 | |
68 OPENSSL_wipe_cpu: | |
69 pxor %xmm0,%xmm0 | |
70 pxor %xmm1,%xmm1 | |
71 pxor %xmm2,%xmm2 | |
72 pxor %xmm3,%xmm3 | |
73 pxor %xmm4,%xmm4 | |
74 pxor %xmm5,%xmm5 | |
75 pxor %xmm6,%xmm6 | |
76 pxor %xmm7,%xmm7 | |
77 pxor %xmm8,%xmm8 | |
78 pxor %xmm9,%xmm9 | |
79 pxor %xmm10,%xmm10 | |
80 pxor %xmm11,%xmm11 | |
81 pxor %xmm12,%xmm12 | |
82 pxor %xmm13,%xmm13 | |
83 pxor %xmm14,%xmm14 | |
84 pxor %xmm15,%xmm15 | |
85 xorq %rcx,%rcx | |
86 xorq %rdx,%rdx | |
87 xorq %rsi,%rsi | |
88 xorq %rdi,%rdi | |
89 xorq %r8,%r8 | |
90 xorq %r9,%r9 | |
91 xorq %r10,%r10 | |
92 xorq %r11,%r11 | |
93 leaq 8(%rsp),%rax | |
94 ret | |
95 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu | |
96 | |
97 .section .init | |
98 call OPENSSL_cpuid_setup | |
99 | |
100 ___ | |
101 | |
102 open STDOUT,"| $^X perlasm/x86_64-xlate.pl $output"; | |
103 print<<___; | |
104 .text | |
105 | |
106 .globl OPENSSL_rdtsc | 35 .globl OPENSSL_rdtsc |
107 .type OPENSSL_rdtsc,\@abi-omnipotent | 36 .type OPENSSL_rdtsc,\@abi-omnipotent |
108 .align 16 | 37 .align 16 |
109 OPENSSL_rdtsc: | 38 OPENSSL_rdtsc: |
110 rdtsc | 39 rdtsc |
111 shl \$32,%rdx | 40 shl \$32,%rdx |
112 or %rdx,%rax | 41 or %rdx,%rax |
113 ret | 42 ret |
114 .size OPENSSL_rdtsc,.-OPENSSL_rdtsc | 43 .size OPENSSL_rdtsc,.-OPENSSL_rdtsc |
115 | 44 |
116 .globl OPENSSL_ia32_cpuid | 45 .globl OPENSSL_ia32_cpuid |
117 .type OPENSSL_ia32_cpuid,\@abi-omnipotent | 46 .type OPENSSL_ia32_cpuid,\@abi-omnipotent |
118 .align 16 | 47 .align 16 |
119 OPENSSL_ia32_cpuid: | 48 OPENSSL_ia32_cpuid: |
120 mov %rbx,%r8 | 49 mov %rbx,%r8 |
121 | 50 |
122 xor %eax,%eax | 51 xor %eax,%eax |
123 cpuid | 52 cpuid |
| 53 mov %eax,%r11d # max value for standard query level |
| 54 |
124 xor %eax,%eax | 55 xor %eax,%eax |
125 cmp \$0x756e6547,%ebx # "Genu" | 56 cmp \$0x756e6547,%ebx # "Genu" |
126 setne %al | 57 setne %al |
127 mov %eax,%r9d | 58 mov %eax,%r9d |
128 cmp \$0x49656e69,%edx # "ineI" | 59 cmp \$0x49656e69,%edx # "ineI" |
129 setne %al | 60 setne %al |
130 or %eax,%r9d | 61 or %eax,%r9d |
131 cmp \$0x6c65746e,%ecx # "ntel" | 62 cmp \$0x6c65746e,%ecx # "ntel" |
132 setne %al | 63 setne %al |
133 » or» %eax,%r9d | 64 » or» %eax,%r9d» » # 0 indicates Intel CPU |
| 65 » jz» .Lintel |
134 | 66 |
| 67 cmp \$0x68747541,%ebx # "Auth" |
| 68 setne %al |
| 69 mov %eax,%r10d |
| 70 cmp \$0x69746E65,%edx # "enti" |
| 71 setne %al |
| 72 or %eax,%r10d |
| 73 cmp \$0x444D4163,%ecx # "cAMD" |
| 74 setne %al |
| 75 or %eax,%r10d # 0 indicates AMD CPU |
| 76 jnz .Lintel |
| 77 |
| 78 # AMD specific |
| 79 mov \$0x80000000,%eax |
| 80 cpuid |
| 81 cmp \$0x80000008,%eax |
| 82 jb .Lintel |
| 83 |
| 84 mov \$0x80000008,%eax |
| 85 cpuid |
| 86 movzb %cl,%r10 # number of cores - 1 |
| 87 inc %r10 # number of cores |
| 88 |
| 89 mov \$1,%eax |
| 90 cpuid |
| 91 bt \$28,%edx # test hyper-threading bit |
| 92 jnc .Ldone |
| 93 shr \$16,%ebx # number of logical processors |
| 94 cmp %r10b,%bl |
| 95 ja .Ldone |
| 96 and \$0xefffffff,%edx # ~(1<<28) |
| 97 jmp .Ldone |
| 98 |
| 99 .Lintel: |
| 100 cmp \$4,%r11d |
| 101 mov \$-1,%r10d |
| 102 jb .Lnocacheinfo |
| 103 |
| 104 mov \$4,%eax |
| 105 mov \$0,%ecx # query L1D |
| 106 cpuid |
| 107 mov %eax,%r10d |
| 108 shr \$14,%r10d |
| 109 and \$0xfff,%r10d # number of cores -1 per L1D |
| 110 |
| 111 .Lnocacheinfo: |
135 mov \$1,%eax | 112 mov \$1,%eax |
136 cpuid | 113 cpuid |
137 cmp \$0,%r9d | 114 cmp \$0,%r9d |
138 jne .Lnotintel | 115 jne .Lnotintel |
139 or \$0x00100000,%edx # use reserved 20th bit to engage RC4_CH
AR | 116 or \$0x00100000,%edx # use reserved 20th bit to engage RC4_CH
AR |
140 and \$15,%ah | 117 and \$15,%ah |
141 cmp \$15,%ah # examine Family ID | 118 cmp \$15,%ah # examine Family ID |
142 je .Lnotintel | 119 je .Lnotintel |
143 or \$0x40000000,%edx # use reserved bit to skip unrolled loop | 120 or \$0x40000000,%edx # use reserved bit to skip unrolled loop |
144 .Lnotintel: | 121 .Lnotintel: |
145 bt \$28,%edx # test hyper-threading bit | 122 bt \$28,%edx # test hyper-threading bit |
146 jnc .Ldone | 123 jnc .Ldone |
| 124 and \$0xefffffff,%edx # ~(1<<28) |
| 125 cmp \$0,%r10d |
| 126 je .Ldone |
| 127 |
| 128 or \$0x10000000,%edx # 1<<28 |
147 shr \$16,%ebx | 129 shr \$16,%ebx |
148 cmp \$1,%bl # see if cache is shared | 130 cmp \$1,%bl # see if cache is shared |
149 ja .Ldone | 131 ja .Ldone |
150 and \$0xefffffff,%edx # ~(1<<28) | 132 and \$0xefffffff,%edx # ~(1<<28) |
151 .Ldone: | 133 .Ldone: |
152 shl \$32,%rcx | 134 shl \$32,%rcx |
153 mov %edx,%eax | 135 mov %edx,%eax |
154 mov %r8,%rbx | 136 mov %r8,%rbx |
155 or %rcx,%rax | 137 or %rcx,%rax |
156 ret | 138 ret |
157 .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid | 139 .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid |
| 140 |
| 141 .globl OPENSSL_cleanse |
| 142 .type OPENSSL_cleanse,\@abi-omnipotent |
| 143 .align 16 |
| 144 OPENSSL_cleanse: |
| 145 xor %rax,%rax |
| 146 cmp \$15,$arg2 |
| 147 jae .Lot |
| 148 cmp \$0,$arg2 |
| 149 je .Lret |
| 150 .Little: |
| 151 mov %al,($arg1) |
| 152 sub \$1,$arg2 |
| 153 lea 1($arg1),$arg1 |
| 154 jnz .Little |
| 155 .Lret: |
| 156 ret |
| 157 .align 16 |
| 158 .Lot: |
| 159 test \$7,$arg1 |
| 160 jz .Laligned |
| 161 mov %al,($arg1) |
| 162 lea -1($arg2),$arg2 |
| 163 lea 1($arg1),$arg1 |
| 164 jmp .Lot |
| 165 .Laligned: |
| 166 mov %rax,($arg1) |
| 167 lea -8($arg2),$arg2 |
| 168 test \$-8,$arg2 |
| 169 lea 8($arg1),$arg1 |
| 170 jnz .Laligned |
| 171 cmp \$0,$arg2 |
| 172 jne .Little |
| 173 ret |
| 174 .size OPENSSL_cleanse,.-OPENSSL_cleanse |
158 ___ | 175 ___ |
| 176 |
| 177 print<<___ if (!$win64); |
| 178 .globl OPENSSL_wipe_cpu |
| 179 .type OPENSSL_wipe_cpu,\@abi-omnipotent |
| 180 .align 16 |
| 181 OPENSSL_wipe_cpu: |
| 182 pxor %xmm0,%xmm0 |
| 183 pxor %xmm1,%xmm1 |
| 184 pxor %xmm2,%xmm2 |
| 185 pxor %xmm3,%xmm3 |
| 186 pxor %xmm4,%xmm4 |
| 187 pxor %xmm5,%xmm5 |
| 188 pxor %xmm6,%xmm6 |
| 189 pxor %xmm7,%xmm7 |
| 190 pxor %xmm8,%xmm8 |
| 191 pxor %xmm9,%xmm9 |
| 192 pxor %xmm10,%xmm10 |
| 193 pxor %xmm11,%xmm11 |
| 194 pxor %xmm12,%xmm12 |
| 195 pxor %xmm13,%xmm13 |
| 196 pxor %xmm14,%xmm14 |
| 197 pxor %xmm15,%xmm15 |
| 198 xorq %rcx,%rcx |
| 199 xorq %rdx,%rdx |
| 200 xorq %rsi,%rsi |
| 201 xorq %rdi,%rdi |
| 202 xorq %r8,%r8 |
| 203 xorq %r9,%r9 |
| 204 xorq %r10,%r10 |
| 205 xorq %r11,%r11 |
| 206 leaq 8(%rsp),%rax |
| 207 ret |
| 208 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu |
| 209 ___ |
| 210 print<<___ if ($win64); |
| 211 .globl OPENSSL_wipe_cpu |
| 212 .type OPENSSL_wipe_cpu,\@abi-omnipotent |
| 213 .align 16 |
| 214 OPENSSL_wipe_cpu: |
| 215 pxor %xmm0,%xmm0 |
| 216 pxor %xmm1,%xmm1 |
| 217 pxor %xmm2,%xmm2 |
| 218 pxor %xmm3,%xmm3 |
| 219 pxor %xmm4,%xmm4 |
| 220 pxor %xmm5,%xmm5 |
| 221 xorq %rcx,%rcx |
| 222 xorq %rdx,%rdx |
| 223 xorq %r8,%r8 |
| 224 xorq %r9,%r9 |
| 225 xorq %r10,%r10 |
| 226 xorq %r11,%r11 |
| 227 leaq 8(%rsp),%rax |
| 228 ret |
| 229 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu |
| 230 ___ |
| 231 |
159 close STDOUT; # flush | 232 close STDOUT; # flush |
OLD | NEW |