| OLD | NEW |
| 1 #!/usr/bin/env perl | 1 #!/usr/bin/env perl |
| 2 | 2 |
| 3 $output=shift; | 3 $flavour = shift; |
| 4 $masm=1 if ($output =~ /\.asm/); | 4 $output = shift; |
| 5 open STDOUT,">$output" || die "can't open $output: $!"; | 5 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } |
| 6 | 6 |
| 7 print<<___ if(defined($masm)); | 7 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); |
| 8 _TEXT» SEGMENT | |
| 9 PUBLIC» OPENSSL_rdtsc | |
| 10 | 8 |
| 11 PUBLIC» OPENSSL_atomic_add | 9 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
| 12 ALIGN» 16 | 10 open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output"; |
| 13 OPENSSL_atomic_add» PROC | |
| 14 » mov» eax,DWORD PTR[rcx] | |
| 15 \$Lspin:» lea» r8,DWORD PTR[rdx+rax] | |
| 16 lock» cmpxchg»DWORD PTR[rcx],r8d | |
| 17 » jne» \$Lspin | |
| 18 » mov» eax,r8d | |
| 19 » cdqe | |
| 20 » ret | |
| 21 OPENSSL_atomic_add» ENDP | |
| 22 | 11 |
| 23 PUBLIC» OPENSSL_wipe_cpu | 12 if ($win64)» { $arg1="%rcx"; $arg2="%rdx"; } |
| 24 ALIGN» 16 | 13 else» » { $arg1="%rdi"; $arg2="%rsi"; } |
| 25 OPENSSL_wipe_cpu» PROC | 14 print<<___; |
| 26 » pxor» xmm0,xmm0 | 15 .extern»» OPENSSL_cpuid_setup |
| 27 » pxor» xmm1,xmm1 | 16 .section» .init |
| 28 » pxor» xmm2,xmm2 | 17 » call» OPENSSL_cpuid_setup |
| 29 » pxor» xmm3,xmm3 | |
| 30 » pxor» xmm4,xmm4 | |
| 31 » pxor» xmm5,xmm5 | |
| 32 » xor» rcx,rcx | |
| 33 » xor» rdx,rdx | |
| 34 » xor» r8,r8 | |
| 35 » xor» r9,r9 | |
| 36 » xor» r10,r10 | |
| 37 » xor» r11,r11 | |
| 38 » lea» rax,QWORD PTR[rsp+8] | |
| 39 » ret | |
| 40 OPENSSL_wipe_cpu» ENDP | |
| 41 _TEXT» ENDS | |
| 42 | 18 |
| 43 CRT\$XIU SEGMENT | |
| 44 EXTRN OPENSSL_cpuid_setup:PROC | |
| 45 DQ OPENSSL_cpuid_setup | |
| 46 CRT\$XIU ENDS | |
| 47 | |
| 48 ___ | |
| 49 print<<___ if(!defined($masm)); | |
| 50 .text | 19 .text |
| 51 | 20 |
| 52 .globl OPENSSL_atomic_add | 21 .globl OPENSSL_atomic_add |
| 53 .type» OPENSSL_atomic_add,\@function | 22 .type» OPENSSL_atomic_add,\@abi-omnipotent |
| 54 .align 16 | 23 .align 16 |
| 55 OPENSSL_atomic_add: | 24 OPENSSL_atomic_add: |
| 56 » movl» (%rdi),%eax | 25 » movl» ($arg1),%eax |
| 57 .Lspin:»leaq» (%rsi,%rax),%r8 | 26 .Lspin:»leaq» ($arg2,%rax),%r8 |
| 58 lock;» cmpxchgl» %r8d,(%rdi) | 27 » .byte» 0xf0» » # lock |
| 28 » cmpxchgl» %r8d,($arg1) |
| 59 jne .Lspin | 29 jne .Lspin |
| 60 movl %r8d,%eax | 30 movl %r8d,%eax |
| 61 » .byte» 0x48,0x98 | 31 » .byte» 0x48,0x98» # cltq/cdqe |
| 62 ret | 32 ret |
| 63 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add | 33 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add |
| 64 | 34 |
| 65 .globl OPENSSL_wipe_cpu | |
| 66 .type OPENSSL_wipe_cpu,\@function | |
| 67 .align 16 | |
| 68 OPENSSL_wipe_cpu: | |
| 69 pxor %xmm0,%xmm0 | |
| 70 pxor %xmm1,%xmm1 | |
| 71 pxor %xmm2,%xmm2 | |
| 72 pxor %xmm3,%xmm3 | |
| 73 pxor %xmm4,%xmm4 | |
| 74 pxor %xmm5,%xmm5 | |
| 75 pxor %xmm6,%xmm6 | |
| 76 pxor %xmm7,%xmm7 | |
| 77 pxor %xmm8,%xmm8 | |
| 78 pxor %xmm9,%xmm9 | |
| 79 pxor %xmm10,%xmm10 | |
| 80 pxor %xmm11,%xmm11 | |
| 81 pxor %xmm12,%xmm12 | |
| 82 pxor %xmm13,%xmm13 | |
| 83 pxor %xmm14,%xmm14 | |
| 84 pxor %xmm15,%xmm15 | |
| 85 xorq %rcx,%rcx | |
| 86 xorq %rdx,%rdx | |
| 87 xorq %rsi,%rsi | |
| 88 xorq %rdi,%rdi | |
| 89 xorq %r8,%r8 | |
| 90 xorq %r9,%r9 | |
| 91 xorq %r10,%r10 | |
| 92 xorq %r11,%r11 | |
| 93 leaq 8(%rsp),%rax | |
| 94 ret | |
| 95 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu | |
| 96 | |
| 97 .section .init | |
| 98 call OPENSSL_cpuid_setup | |
| 99 | |
| 100 ___ | |
| 101 | |
| 102 open STDOUT,"| $^X perlasm/x86_64-xlate.pl $output"; | |
| 103 print<<___; | |
| 104 .text | |
| 105 | |
| 106 .globl OPENSSL_rdtsc | 35 .globl OPENSSL_rdtsc |
| 107 .type OPENSSL_rdtsc,\@abi-omnipotent | 36 .type OPENSSL_rdtsc,\@abi-omnipotent |
| 108 .align 16 | 37 .align 16 |
| 109 OPENSSL_rdtsc: | 38 OPENSSL_rdtsc: |
| 110 rdtsc | 39 rdtsc |
| 111 shl \$32,%rdx | 40 shl \$32,%rdx |
| 112 or %rdx,%rax | 41 or %rdx,%rax |
| 113 ret | 42 ret |
| 114 .size OPENSSL_rdtsc,.-OPENSSL_rdtsc | 43 .size OPENSSL_rdtsc,.-OPENSSL_rdtsc |
| 115 | 44 |
| 116 .globl OPENSSL_ia32_cpuid | 45 .globl OPENSSL_ia32_cpuid |
| 117 .type OPENSSL_ia32_cpuid,\@abi-omnipotent | 46 .type OPENSSL_ia32_cpuid,\@abi-omnipotent |
| 118 .align 16 | 47 .align 16 |
| 119 OPENSSL_ia32_cpuid: | 48 OPENSSL_ia32_cpuid: |
| 120 mov %rbx,%r8 | 49 mov %rbx,%r8 |
| 121 | 50 |
| 122 xor %eax,%eax | 51 xor %eax,%eax |
| 123 cpuid | 52 cpuid |
| 53 mov %eax,%r11d # max value for standard query level |
| 54 |
| 124 xor %eax,%eax | 55 xor %eax,%eax |
| 125 cmp \$0x756e6547,%ebx # "Genu" | 56 cmp \$0x756e6547,%ebx # "Genu" |
| 126 setne %al | 57 setne %al |
| 127 mov %eax,%r9d | 58 mov %eax,%r9d |
| 128 cmp \$0x49656e69,%edx # "ineI" | 59 cmp \$0x49656e69,%edx # "ineI" |
| 129 setne %al | 60 setne %al |
| 130 or %eax,%r9d | 61 or %eax,%r9d |
| 131 cmp \$0x6c65746e,%ecx # "ntel" | 62 cmp \$0x6c65746e,%ecx # "ntel" |
| 132 setne %al | 63 setne %al |
| 133 » or» %eax,%r9d | 64 » or» %eax,%r9d» » # 0 indicates Intel CPU |
| 65 » jz» .Lintel |
| 134 | 66 |
| 67 cmp \$0x68747541,%ebx # "Auth" |
| 68 setne %al |
| 69 mov %eax,%r10d |
| 70 cmp \$0x69746E65,%edx # "enti" |
| 71 setne %al |
| 72 or %eax,%r10d |
| 73 cmp \$0x444D4163,%ecx # "cAMD" |
| 74 setne %al |
| 75 or %eax,%r10d # 0 indicates AMD CPU |
| 76 jnz .Lintel |
| 77 |
| 78 # AMD specific |
| 79 mov \$0x80000000,%eax |
| 80 cpuid |
| 81 cmp \$0x80000008,%eax |
| 82 jb .Lintel |
| 83 |
| 84 mov \$0x80000008,%eax |
| 85 cpuid |
| 86 movzb %cl,%r10 # number of cores - 1 |
| 87 inc %r10 # number of cores |
| 88 |
| 89 mov \$1,%eax |
| 90 cpuid |
| 91 bt \$28,%edx # test hyper-threading bit |
| 92 jnc .Ldone |
| 93 shr \$16,%ebx # number of logical processors |
| 94 cmp %r10b,%bl |
| 95 ja .Ldone |
| 96 and \$0xefffffff,%edx # ~(1<<28) |
| 97 jmp .Ldone |
| 98 |
| 99 .Lintel: |
| 100 cmp \$4,%r11d |
| 101 mov \$-1,%r10d |
| 102 jb .Lnocacheinfo |
| 103 |
| 104 mov \$4,%eax |
| 105 mov \$0,%ecx # query L1D |
| 106 cpuid |
| 107 mov %eax,%r10d |
| 108 shr \$14,%r10d |
| 109 and \$0xfff,%r10d # number of cores -1 per L1D |
| 110 |
| 111 .Lnocacheinfo: |
| 135 mov \$1,%eax | 112 mov \$1,%eax |
| 136 cpuid | 113 cpuid |
| 137 cmp \$0,%r9d | 114 cmp \$0,%r9d |
| 138 jne .Lnotintel | 115 jne .Lnotintel |
| 139 or \$0x00100000,%edx # use reserved 20th bit to engage RC4_CH
AR | 116 or \$0x00100000,%edx # use reserved 20th bit to engage RC4_CH
AR |
| 140 and \$15,%ah | 117 and \$15,%ah |
| 141 cmp \$15,%ah # examine Family ID | 118 cmp \$15,%ah # examine Family ID |
| 142 je .Lnotintel | 119 je .Lnotintel |
| 143 or \$0x40000000,%edx # use reserved bit to skip unrolled loop | 120 or \$0x40000000,%edx # use reserved bit to skip unrolled loop |
| 144 .Lnotintel: | 121 .Lnotintel: |
| 145 bt \$28,%edx # test hyper-threading bit | 122 bt \$28,%edx # test hyper-threading bit |
| 146 jnc .Ldone | 123 jnc .Ldone |
| 124 and \$0xefffffff,%edx # ~(1<<28) |
| 125 cmp \$0,%r10d |
| 126 je .Ldone |
| 127 |
| 128 or \$0x10000000,%edx # 1<<28 |
| 147 shr \$16,%ebx | 129 shr \$16,%ebx |
| 148 cmp \$1,%bl # see if cache is shared | 130 cmp \$1,%bl # see if cache is shared |
| 149 ja .Ldone | 131 ja .Ldone |
| 150 and \$0xefffffff,%edx # ~(1<<28) | 132 and \$0xefffffff,%edx # ~(1<<28) |
| 151 .Ldone: | 133 .Ldone: |
| 152 shl \$32,%rcx | 134 shl \$32,%rcx |
| 153 mov %edx,%eax | 135 mov %edx,%eax |
| 154 mov %r8,%rbx | 136 mov %r8,%rbx |
| 155 or %rcx,%rax | 137 or %rcx,%rax |
| 156 ret | 138 ret |
| 157 .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid | 139 .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid |
| 140 |
| 141 .globl OPENSSL_cleanse |
| 142 .type OPENSSL_cleanse,\@abi-omnipotent |
| 143 .align 16 |
| 144 OPENSSL_cleanse: |
| 145 xor %rax,%rax |
| 146 cmp \$15,$arg2 |
| 147 jae .Lot |
| 148 cmp \$0,$arg2 |
| 149 je .Lret |
| 150 .Little: |
| 151 mov %al,($arg1) |
| 152 sub \$1,$arg2 |
| 153 lea 1($arg1),$arg1 |
| 154 jnz .Little |
| 155 .Lret: |
| 156 ret |
| 157 .align 16 |
| 158 .Lot: |
| 159 test \$7,$arg1 |
| 160 jz .Laligned |
| 161 mov %al,($arg1) |
| 162 lea -1($arg2),$arg2 |
| 163 lea 1($arg1),$arg1 |
| 164 jmp .Lot |
| 165 .Laligned: |
| 166 mov %rax,($arg1) |
| 167 lea -8($arg2),$arg2 |
| 168 test \$-8,$arg2 |
| 169 lea 8($arg1),$arg1 |
| 170 jnz .Laligned |
| 171 cmp \$0,$arg2 |
| 172 jne .Little |
| 173 ret |
| 174 .size OPENSSL_cleanse,.-OPENSSL_cleanse |
| 158 ___ | 175 ___ |
| 176 |
| 177 print<<___ if (!$win64); |
| 178 .globl OPENSSL_wipe_cpu |
| 179 .type OPENSSL_wipe_cpu,\@abi-omnipotent |
| 180 .align 16 |
| 181 OPENSSL_wipe_cpu: |
| 182 pxor %xmm0,%xmm0 |
| 183 pxor %xmm1,%xmm1 |
| 184 pxor %xmm2,%xmm2 |
| 185 pxor %xmm3,%xmm3 |
| 186 pxor %xmm4,%xmm4 |
| 187 pxor %xmm5,%xmm5 |
| 188 pxor %xmm6,%xmm6 |
| 189 pxor %xmm7,%xmm7 |
| 190 pxor %xmm8,%xmm8 |
| 191 pxor %xmm9,%xmm9 |
| 192 pxor %xmm10,%xmm10 |
| 193 pxor %xmm11,%xmm11 |
| 194 pxor %xmm12,%xmm12 |
| 195 pxor %xmm13,%xmm13 |
| 196 pxor %xmm14,%xmm14 |
| 197 pxor %xmm15,%xmm15 |
| 198 xorq %rcx,%rcx |
| 199 xorq %rdx,%rdx |
| 200 xorq %rsi,%rsi |
| 201 xorq %rdi,%rdi |
| 202 xorq %r8,%r8 |
| 203 xorq %r9,%r9 |
| 204 xorq %r10,%r10 |
| 205 xorq %r11,%r11 |
| 206 leaq 8(%rsp),%rax |
| 207 ret |
| 208 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu |
| 209 ___ |
| 210 print<<___ if ($win64); |
| 211 .globl OPENSSL_wipe_cpu |
| 212 .type OPENSSL_wipe_cpu,\@abi-omnipotent |
| 213 .align 16 |
| 214 OPENSSL_wipe_cpu: |
| 215 pxor %xmm0,%xmm0 |
| 216 pxor %xmm1,%xmm1 |
| 217 pxor %xmm2,%xmm2 |
| 218 pxor %xmm3,%xmm3 |
| 219 pxor %xmm4,%xmm4 |
| 220 pxor %xmm5,%xmm5 |
| 221 xorq %rcx,%rcx |
| 222 xorq %rdx,%rdx |
| 223 xorq %r8,%r8 |
| 224 xorq %r9,%r9 |
| 225 xorq %r10,%r10 |
| 226 xorq %r11,%r11 |
| 227 leaq 8(%rsp),%rax |
| 228 ret |
| 229 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu |
| 230 ___ |
| 231 |
| 159 close STDOUT; # flush | 232 close STDOUT; # flush |
| OLD | NEW |