OLD | NEW |
1 #!/usr/bin/env perl | 1 #!/usr/bin/env perl |
2 | 2 |
3 push(@INC,"perlasm"); | 3 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
| 4 push(@INC, "${dir}perlasm", "perlasm"); |
4 require "x86asm.pl"; | 5 require "x86asm.pl"; |
5 | 6 |
6 &asm_init($ARGV[0],"x86cpuid"); | 7 &asm_init($ARGV[0],"x86cpuid"); |
7 | 8 |
8 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | 9 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } |
9 | 10 |
10 &function_begin("OPENSSL_ia32_cpuid"); | 11 &function_begin("OPENSSL_ia32_cpuid"); |
11 &xor ("edx","edx"); | 12 &xor ("edx","edx"); |
12 &pushf (); | 13 &pushf (); |
13 &pop ("eax"); | 14 &pop ("eax"); |
14 &mov ("ecx","eax"); | 15 &mov ("ecx","eax"); |
15 &xor ("eax",1<<21); | 16 &xor ("eax",1<<21); |
16 &push ("eax"); | 17 &push ("eax"); |
17 &popf (); | 18 &popf (); |
18 &pushf (); | 19 &pushf (); |
19 &pop ("eax"); | 20 &pop ("eax"); |
20 &xor ("ecx","eax"); | 21 &xor ("ecx","eax"); |
21 &bt ("ecx",21); | 22 &bt ("ecx",21); |
22 &jnc (&label("done")); | 23 &jnc (&label("done")); |
23 &xor ("eax","eax"); | 24 &xor ("eax","eax"); |
24 &cpuid (); | 25 &cpuid (); |
| 26 &mov ("edi","eax"); # max value for standard query level |
| 27 |
25 &xor ("eax","eax"); | 28 &xor ("eax","eax"); |
26 &cmp ("ebx",0x756e6547); # "Genu" | 29 &cmp ("ebx",0x756e6547); # "Genu" |
27 » &data_byte(0x0f,0x95,0xc0);» #&setne»(&LB("eax")); | 30 » &setne» (&LB("eax")); |
28 &mov ("ebp","eax"); | 31 &mov ("ebp","eax"); |
29 &cmp ("edx",0x49656e69); # "ineI" | 32 &cmp ("edx",0x49656e69); # "ineI" |
30 » &data_byte(0x0f,0x95,0xc0);» #&setne»(&LB("eax")); | 33 » &setne» (&LB("eax")); |
31 &or ("ebp","eax"); | 34 &or ("ebp","eax"); |
32 &cmp ("ecx",0x6c65746e); # "ntel" | 35 &cmp ("ecx",0x6c65746e); # "ntel" |
33 » &data_byte(0x0f,0x95,0xc0);» #&setne»(&LB("eax")); | 36 » &setne» (&LB("eax")); |
34 » &or» ("ebp","eax"); | 37 » &or» ("ebp","eax");» » # 0 indicates Intel CPU |
| 38 » &jz» (&label("intel")); |
| 39 |
| 40 » &cmp» ("ebx",0x68747541);» # "Auth" |
| 41 » &setne» (&LB("eax")); |
| 42 » &mov» ("esi","eax"); |
| 43 » &cmp» ("edx",0x69746E65);» # "enti" |
| 44 » &setne» (&LB("eax")); |
| 45 » &or» ("esi","eax"); |
| 46 » &cmp» ("ecx",0x444D4163);» # "cAMD" |
| 47 » &setne» (&LB("eax")); |
| 48 » &or» ("esi","eax");» » # 0 indicates AMD CPU |
| 49 » &jnz» (&label("intel")); |
| 50 |
| 51 » # AMD specific |
| 52 » &mov» ("eax",0x80000000); |
| 53 » &cpuid» (); |
| 54 » &cmp» ("eax",0x80000008); |
| 55 » &jb» (&label("intel")); |
| 56 |
| 57 » &mov» ("eax",0x80000008); |
| 58 » &cpuid» (); |
| 59 » &movz» ("esi",&LB("ecx"));» # number of cores - 1 |
| 60 » &inc» ("esi");» » # number of cores |
| 61 |
| 62 » &mov» ("eax",1); |
| 63 » &cpuid» (); |
| 64 » &bt» ("edx",28); |
| 65 » &jnc» (&label("done")); |
| 66 » &shr» ("ebx",16); |
| 67 » &and» ("ebx",0xff); |
| 68 » &cmp» ("ebx","esi"); |
| 69 » &ja» (&label("done")); |
| 70 » &and» ("edx",0xefffffff);» # clear hyper-threading bit |
| 71 » &jmp» (&label("done")); |
| 72 » |
| 73 &set_label("intel"); |
| 74 » &cmp» ("edi",4); |
| 75 » &mov» ("edi",-1); |
| 76 » &jb» (&label("nocacheinfo")); |
| 77 |
| 78 » &mov» ("eax",4); |
| 79 » &mov» ("ecx",0);» » # query L1D |
| 80 » &cpuid» (); |
| 81 » &mov» ("edi","eax"); |
| 82 » &shr» ("edi",14); |
| 83 » &and» ("edi",0xfff);» » # number of cores -1 per L1D |
| 84 |
| 85 &set_label("nocacheinfo"); |
35 &mov ("eax",1); | 86 &mov ("eax",1); |
36 &cpuid (); | 87 &cpuid (); |
37 &cmp ("ebp",0); | 88 &cmp ("ebp",0); |
38 &jne (&label("notP4")); | 89 &jne (&label("notP4")); |
39 » &and» ("eax",15<<8);» » # familiy ID | 90 » &and» (&HB("eax"),15);» # familiy ID |
40 » &cmp» ("eax",15<<8);» » # P4? | 91 » &cmp» (&HB("eax"),15);» # P4? |
41 &jne (&label("notP4")); | 92 &jne (&label("notP4")); |
42 &or ("edx",1<<20); # use reserved bit to engage RC4_CHAR | 93 &or ("edx",1<<20); # use reserved bit to engage RC4_CHAR |
43 &set_label("notP4"); | 94 &set_label("notP4"); |
44 &bt ("edx",28); # test hyper-threading bit | 95 &bt ("edx",28); # test hyper-threading bit |
45 &jnc (&label("done")); | 96 &jnc (&label("done")); |
| 97 &and ("edx",0xefffffff); |
| 98 &cmp ("edi",0); |
| 99 &je (&label("done")); |
| 100 |
| 101 &or ("edx",0x10000000); |
46 &shr ("ebx",16); | 102 &shr ("ebx",16); |
47 » &and» ("ebx",0xff); | 103 » &cmp» (&LB("ebx"),1); |
48 » &cmp» ("ebx",1);» » # see if cache is shared(*) | |
49 &ja (&label("done")); | 104 &ja (&label("done")); |
50 &and ("edx",0xefffffff); # clear hyper-threading bit if not | 105 &and ("edx",0xefffffff); # clear hyper-threading bit if not |
51 &set_label("done"); | 106 &set_label("done"); |
52 &mov ("eax","edx"); | 107 &mov ("eax","edx"); |
53 &mov ("edx","ecx"); | 108 &mov ("edx","ecx"); |
54 &function_end("OPENSSL_ia32_cpuid"); | 109 &function_end("OPENSSL_ia32_cpuid"); |
55 # (*) on Core2 this value is set to 2 denoting the fact that L2 | |
56 # cache is shared between cores. | |
57 | 110 |
58 &external_label("OPENSSL_ia32cap_P"); | 111 &external_label("OPENSSL_ia32cap_P"); |
59 | 112 |
60 &function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); | 113 &function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); |
61 &xor ("eax","eax"); | 114 &xor ("eax","eax"); |
62 &xor ("edx","edx"); | 115 &xor ("edx","edx"); |
63 &picmeup("ecx","OPENSSL_ia32cap_P"); | 116 &picmeup("ecx","OPENSSL_ia32cap_P"); |
64 &bt (&DWP(0,"ecx"),4); | 117 &bt (&DWP(0,"ecx"),4); |
65 &jnc (&label("notsc")); | 118 &jnc (&label("notsc")); |
66 &rdtsc (); | 119 &rdtsc (); |
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
213 } | 266 } |
214 &call_ptr (&DWP(8,"ebp"));# make the call... | 267 &call_ptr (&DWP(8,"ebp"));# make the call... |
215 &mov ("esp","ebp"); # ... and just restore the stack pointer | 268 &mov ("esp","ebp"); # ... and just restore the stack pointer |
216 # without paying attention to what we called, | 269 # without paying attention to what we called, |
217 # (__cdecl *func) or (__stdcall *one). | 270 # (__cdecl *func) or (__stdcall *one). |
218 &pop ("ebp"); | 271 &pop ("ebp"); |
219 &ret (); | 272 &ret (); |
220 } | 273 } |
221 &function_end_B("OPENSSL_indirect_call"); | 274 &function_end_B("OPENSSL_indirect_call"); |
222 | 275 |
| 276 &function_begin_B("OPENSSL_cleanse"); |
| 277 &mov ("edx",&wparam(0)); |
| 278 &mov ("ecx",&wparam(1)); |
| 279 &xor ("eax","eax"); |
| 280 &cmp ("ecx",7); |
| 281 &jae (&label("lot")); |
| 282 &cmp ("ecx",0); |
| 283 &je (&label("ret")); |
| 284 &set_label("little"); |
| 285 &mov (&BP(0,"edx"),"al"); |
| 286 &sub ("ecx",1); |
| 287 &lea ("edx",&DWP(1,"edx")); |
| 288 &jnz (&label("little")); |
| 289 &set_label("ret"); |
| 290 &ret (); |
| 291 |
| 292 &set_label("lot",16); |
| 293 &test ("edx",3); |
| 294 &jz (&label("aligned")); |
| 295 &mov (&BP(0,"edx"),"al"); |
| 296 &lea ("ecx",&DWP(-1,"ecx")); |
| 297 &lea ("edx",&DWP(1,"edx")); |
| 298 &jmp (&label("lot")); |
| 299 &set_label("aligned"); |
| 300 &mov (&DWP(0,"edx"),"eax"); |
| 301 &lea ("ecx",&DWP(-4,"ecx")); |
| 302 &test ("ecx",-4); |
| 303 &lea ("edx",&DWP(4,"edx")); |
| 304 &jnz (&label("aligned")); |
| 305 &cmp ("ecx",0); |
| 306 &jne (&label("little")); |
| 307 &ret (); |
| 308 &function_end_B("OPENSSL_cleanse"); |
| 309 |
223 &initseg("OPENSSL_cpuid_setup"); | 310 &initseg("OPENSSL_cpuid_setup"); |
224 | 311 |
225 &asm_finish(); | 312 &asm_finish(); |
OLD | NEW |