| Index: openssl/crypto/x86_64cpuid.pl
|
| ===================================================================
|
| --- openssl/crypto/x86_64cpuid.pl (revision 105093)
|
| +++ openssl/crypto/x86_64cpuid.pl (working copy)
|
| @@ -1,108 +1,37 @@
|
| #!/usr/bin/env perl
|
|
|
| -$output=shift;
|
| -$masm=1 if ($output =~ /\.asm/);
|
| -open STDOUT,">$output" || die "can't open $output: $!";
|
| +$flavour = shift;
|
| +$output = shift;
|
| +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
|
|
|
| -print<<___ if(defined($masm));
|
| -_TEXT SEGMENT
|
| -PUBLIC OPENSSL_rdtsc
|
| +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
|
|
|
| -PUBLIC OPENSSL_atomic_add
|
| -ALIGN 16
|
| -OPENSSL_atomic_add PROC
|
| - mov eax,DWORD PTR[rcx]
|
| -\$Lspin: lea r8,DWORD PTR[rdx+rax]
|
| -lock cmpxchg DWORD PTR[rcx],r8d
|
| - jne \$Lspin
|
| - mov eax,r8d
|
| - cdqe
|
| - ret
|
| -OPENSSL_atomic_add ENDP
|
| +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
| +open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output";
|
|
|
| -PUBLIC OPENSSL_wipe_cpu
|
| -ALIGN 16
|
| -OPENSSL_wipe_cpu PROC
|
| - pxor xmm0,xmm0
|
| - pxor xmm1,xmm1
|
| - pxor xmm2,xmm2
|
| - pxor xmm3,xmm3
|
| - pxor xmm4,xmm4
|
| - pxor xmm5,xmm5
|
| - xor rcx,rcx
|
| - xor rdx,rdx
|
| - xor r8,r8
|
| - xor r9,r9
|
| - xor r10,r10
|
| - xor r11,r11
|
| - lea rax,QWORD PTR[rsp+8]
|
| - ret
|
| -OPENSSL_wipe_cpu ENDP
|
| -_TEXT ENDS
|
| +if ($win64) { $arg1="%rcx"; $arg2="%rdx"; }
|
| +else { $arg1="%rdi"; $arg2="%rsi"; }
|
| +print<<___;
|
| +.extern OPENSSL_cpuid_setup
|
| +.section .init
|
| + call OPENSSL_cpuid_setup
|
|
|
| -CRT\$XIU SEGMENT
|
| -EXTRN OPENSSL_cpuid_setup:PROC
|
| -DQ OPENSSL_cpuid_setup
|
| -CRT\$XIU ENDS
|
| -
|
| -___
|
| -print<<___ if(!defined($masm));
|
| .text
|
|
|
| .globl OPENSSL_atomic_add
|
| -.type OPENSSL_atomic_add,\@function
|
| +.type OPENSSL_atomic_add,\@abi-omnipotent
|
| .align 16
|
| OPENSSL_atomic_add:
|
| - movl (%rdi),%eax
|
| -.Lspin: leaq (%rsi,%rax),%r8
|
| -lock; cmpxchgl %r8d,(%rdi)
|
| + movl ($arg1),%eax
|
| +.Lspin: leaq ($arg2,%rax),%r8
|
| + .byte 0xf0 # lock
|
| + cmpxchgl %r8d,($arg1)
|
| jne .Lspin
|
| movl %r8d,%eax
|
| - .byte 0x48,0x98
|
| + .byte 0x48,0x98 # cltq/cdqe
|
| ret
|
| .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
|
|
|
| -.globl OPENSSL_wipe_cpu
|
| -.type OPENSSL_wipe_cpu,\@function
|
| -.align 16
|
| -OPENSSL_wipe_cpu:
|
| - pxor %xmm0,%xmm0
|
| - pxor %xmm1,%xmm1
|
| - pxor %xmm2,%xmm2
|
| - pxor %xmm3,%xmm3
|
| - pxor %xmm4,%xmm4
|
| - pxor %xmm5,%xmm5
|
| - pxor %xmm6,%xmm6
|
| - pxor %xmm7,%xmm7
|
| - pxor %xmm8,%xmm8
|
| - pxor %xmm9,%xmm9
|
| - pxor %xmm10,%xmm10
|
| - pxor %xmm11,%xmm11
|
| - pxor %xmm12,%xmm12
|
| - pxor %xmm13,%xmm13
|
| - pxor %xmm14,%xmm14
|
| - pxor %xmm15,%xmm15
|
| - xorq %rcx,%rcx
|
| - xorq %rdx,%rdx
|
| - xorq %rsi,%rsi
|
| - xorq %rdi,%rdi
|
| - xorq %r8,%r8
|
| - xorq %r9,%r9
|
| - xorq %r10,%r10
|
| - xorq %r11,%r11
|
| - leaq 8(%rsp),%rax
|
| - ret
|
| -.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
| -
|
| -.section .init
|
| - call OPENSSL_cpuid_setup
|
| -
|
| -___
|
| -
|
| -open STDOUT,"| $^X perlasm/x86_64-xlate.pl $output";
|
| -print<<___;
|
| -.text
|
| -
|
| .globl OPENSSL_rdtsc
|
| .type OPENSSL_rdtsc,\@abi-omnipotent
|
| .align 16
|
| @@ -121,6 +50,8 @@
|
|
|
| xor %eax,%eax
|
| cpuid
|
| + mov %eax,%r11d # max value for standard query level
|
| +
|
| xor %eax,%eax
|
| cmp \$0x756e6547,%ebx # "Genu"
|
| setne %al
|
| @@ -130,10 +61,56 @@
|
| or %eax,%r9d
|
| cmp \$0x6c65746e,%ecx # "ntel"
|
| setne %al
|
| - or %eax,%r9d
|
| + or %eax,%r9d # 0 indicates Intel CPU
|
| + jz .Lintel
|
|
|
| + cmp \$0x68747541,%ebx # "Auth"
|
| + setne %al
|
| + mov %eax,%r10d
|
| + cmp \$0x69746E65,%edx # "enti"
|
| + setne %al
|
| + or %eax,%r10d
|
| + cmp \$0x444D4163,%ecx # "cAMD"
|
| + setne %al
|
| + or %eax,%r10d # 0 indicates AMD CPU
|
| + jnz .Lintel
|
| +
|
| + # AMD specific
|
| + mov \$0x80000000,%eax
|
| + cpuid
|
| + cmp \$0x80000008,%eax
|
| + jb .Lintel
|
| +
|
| + mov \$0x80000008,%eax
|
| + cpuid
|
| + movzb %cl,%r10 # number of cores - 1
|
| + inc %r10 # number of cores
|
| +
|
| mov \$1,%eax
|
| cpuid
|
| + bt \$28,%edx # test hyper-threading bit
|
| + jnc .Ldone
|
| + shr \$16,%ebx # number of logical processors
|
| + cmp %r10b,%bl
|
| + ja .Ldone
|
| + and \$0xefffffff,%edx # ~(1<<28)
|
| + jmp .Ldone
|
| +
|
| +.Lintel:
|
| + cmp \$4,%r11d
|
| + mov \$-1,%r10d
|
| + jb .Lnocacheinfo
|
| +
|
| + mov \$4,%eax
|
| + mov \$0,%ecx # query L1D
|
| + cpuid
|
| + mov %eax,%r10d
|
| + shr \$14,%r10d
|
| + and \$0xfff,%r10d # number of cores -1 per L1D
|
| +
|
| +.Lnocacheinfo:
|
| + mov \$1,%eax
|
| + cpuid
|
| cmp \$0,%r9d
|
| jne .Lnotintel
|
| or \$0x00100000,%edx # use reserved 20th bit to engage RC4_CHAR
|
| @@ -144,6 +121,11 @@
|
| .Lnotintel:
|
| bt \$28,%edx # test hyper-threading bit
|
| jnc .Ldone
|
| + and \$0xefffffff,%edx # ~(1<<28)
|
| + cmp \$0,%r10d
|
| + je .Ldone
|
| +
|
| + or \$0x10000000,%edx # 1<<28
|
| shr \$16,%ebx
|
| cmp \$1,%bl # see if cache is shared
|
| ja .Ldone
|
| @@ -155,5 +137,96 @@
|
| or %rcx,%rax
|
| ret
|
| .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
|
| +
|
| +.globl OPENSSL_cleanse
|
| +.type OPENSSL_cleanse,\@abi-omnipotent
|
| +.align 16
|
| +OPENSSL_cleanse:
|
| + xor %rax,%rax
|
| + cmp \$15,$arg2
|
| + jae .Lot
|
| + cmp \$0,$arg2
|
| + je .Lret
|
| +.Little:
|
| + mov %al,($arg1)
|
| + sub \$1,$arg2
|
| + lea 1($arg1),$arg1
|
| + jnz .Little
|
| +.Lret:
|
| + ret
|
| +.align 16
|
| +.Lot:
|
| + test \$7,$arg1
|
| + jz .Laligned
|
| + mov %al,($arg1)
|
| + lea -1($arg2),$arg2
|
| + lea 1($arg1),$arg1
|
| + jmp .Lot
|
| +.Laligned:
|
| + mov %rax,($arg1)
|
| + lea -8($arg2),$arg2
|
| + test \$-8,$arg2
|
| + lea 8($arg1),$arg1
|
| + jnz .Laligned
|
| + cmp \$0,$arg2
|
| + jne .Little
|
| + ret
|
| +.size OPENSSL_cleanse,.-OPENSSL_cleanse
|
| ___
|
| +
|
| +print<<___ if (!$win64);
|
| +.globl OPENSSL_wipe_cpu
|
| +.type OPENSSL_wipe_cpu,\@abi-omnipotent
|
| +.align 16
|
| +OPENSSL_wipe_cpu:
|
| + pxor %xmm0,%xmm0
|
| + pxor %xmm1,%xmm1
|
| + pxor %xmm2,%xmm2
|
| + pxor %xmm3,%xmm3
|
| + pxor %xmm4,%xmm4
|
| + pxor %xmm5,%xmm5
|
| + pxor %xmm6,%xmm6
|
| + pxor %xmm7,%xmm7
|
| + pxor %xmm8,%xmm8
|
| + pxor %xmm9,%xmm9
|
| + pxor %xmm10,%xmm10
|
| + pxor %xmm11,%xmm11
|
| + pxor %xmm12,%xmm12
|
| + pxor %xmm13,%xmm13
|
| + pxor %xmm14,%xmm14
|
| + pxor %xmm15,%xmm15
|
| + xorq %rcx,%rcx
|
| + xorq %rdx,%rdx
|
| + xorq %rsi,%rsi
|
| + xorq %rdi,%rdi
|
| + xorq %r8,%r8
|
| + xorq %r9,%r9
|
| + xorq %r10,%r10
|
| + xorq %r11,%r11
|
| + leaq 8(%rsp),%rax
|
| + ret
|
| +.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
| +___
|
| +print<<___ if ($win64);
|
| +.globl OPENSSL_wipe_cpu
|
| +.type OPENSSL_wipe_cpu,\@abi-omnipotent
|
| +.align 16
|
| +OPENSSL_wipe_cpu:
|
| + pxor %xmm0,%xmm0
|
| + pxor %xmm1,%xmm1
|
| + pxor %xmm2,%xmm2
|
| + pxor %xmm3,%xmm3
|
| + pxor %xmm4,%xmm4
|
| + pxor %xmm5,%xmm5
|
| + xorq %rcx,%rcx
|
| + xorq %rdx,%rdx
|
| + xorq %r8,%r8
|
| + xorq %r9,%r9
|
| + xorq %r10,%r10
|
| + xorq %r11,%r11
|
| + leaq 8(%rsp),%rax
|
| + ret
|
| +.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
| +___
|
| +
|
| close STDOUT; # flush
|
|
|