Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(160)

Side by Side Diff: base/cpu.cc

Issue 1380943002: Add AVX2 detection (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: fix typo in xgetbv Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/cpu.h" 5 #include "base/cpu.h"
6 6
7 #include <stdlib.h> 7 #include <stdlib.h>
8 #include <string.h> 8 #include <string.h>
9 9
10 #include <algorithm> 10 #include <algorithm>
(...skipping 26 matching lines...) Expand all
37 ext_family_(0), 37 ext_family_(0),
38 has_mmx_(false), 38 has_mmx_(false),
39 has_sse_(false), 39 has_sse_(false),
40 has_sse2_(false), 40 has_sse2_(false),
41 has_sse3_(false), 41 has_sse3_(false),
42 has_ssse3_(false), 42 has_ssse3_(false),
43 has_sse41_(false), 43 has_sse41_(false),
44 has_sse42_(false), 44 has_sse42_(false),
45 has_avx_(false), 45 has_avx_(false),
46 has_avx_hardware_(false), 46 has_avx_hardware_(false),
47 has_avx2_(false),
47 has_aesni_(false), 48 has_aesni_(false),
48 has_non_stop_time_stamp_counter_(false), 49 has_non_stop_time_stamp_counter_(false),
49 has_broken_neon_(false), 50 has_broken_neon_(false),
50 cpu_vendor_("unknown") { 51 cpu_vendor_("unknown") {
51 Initialize(); 52 Initialize();
52 } 53 }
53 54
54 namespace { 55 namespace {
55 56
56 #if defined(ARCH_CPU_X86_FAMILY) 57 #if defined(ARCH_CPU_X86_FAMILY)
57 #ifndef _MSC_VER 58 #ifndef _MSC_VER
58 59
59 #if defined(__pic__) && defined(__i386__) 60 #if defined(__pic__) && defined(__i386__)
60 61
61 void __cpuid(int cpu_info[4], int info_type) { 62 void __cpuid(int cpu_info[4], int info_type) {
62 __asm__ volatile ( 63 __asm__ volatile (
63 "mov %%ebx, %%edi\n" 64 "mov %%ebx, %%edi\n"
64 "cpuid\n" 65 "cpuid\n"
65 "xchg %%edi, %%ebx\n" 66 "xchg %%edi, %%ebx\n"
66 : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) 67 : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
67 : "a"(info_type) 68 : "a"(info_type)
68 ); 69 );
69 } 70 }
70 71
71 #else 72 #else
72 73
73 void __cpuid(int cpu_info[4], int info_type) { 74 void __cpuid(int cpu_info[4], int info_type) {
74 __asm__ volatile ( 75 __asm__ volatile (
75 "cpuid \n\t" 76 "cpuid\n"
76 : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) 77 : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
77 : "a"(info_type) 78 : "a"(info_type)
78 ); 79 );
79 } 80 }
80 81
81 #endif 82 #endif
82 83
83 // _xgetbv returns the value of an Intel Extended Control Register (XCR). 84 // _xgetbv returns the value of an Intel Extended Control Register (XCR).
84 // Currently only XCR0 is defined by Intel so |xcr| should always be zero. 85 // Currently only XCR0 is defined by Intel so |xcr| should always be zero.
85 uint64 _xgetbv(uint32 xcr) { 86 uint64 _xgetbv(uint32 xcr) {
86 uint32 eax, edx; 87 uint32 eax, edx;
87 88
88 __asm__ volatile ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (xcr)); 89 __asm__ volatile (
90 "xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
89 return (static_cast<uint64>(edx) << 32) | eax; 91 return (static_cast<uint64>(edx) << 32) | eax;
90 } 92 }
91 93
92 #endif // !_MSC_VER 94 #endif // !_MSC_VER
93 #endif // ARCH_CPU_X86_FAMILY 95 #endif // ARCH_CPU_X86_FAMILY
94 96
95 #if defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(OS_LINUX)) 97 #if defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(OS_LINUX))
96 class LazyCpuInfoValue { 98 class LazyCpuInfoValue {
97 public: 99 public:
98 LazyCpuInfoValue() : has_broken_neon_(false) { 100 LazyCpuInfoValue() : has_broken_neon_(false) {
99 // This function finds the value from /proc/cpuinfo under the key "model 101 // This function finds the value from /proc/cpuinfo under the key "model
100 // name" or "Processor". "model name" is used in Linux 3.8 and later (3.7 102 // name" or "Processor". "model name" is used in Linux 3.8 and later (3.7
101 // and later for arm64) and is shown once per CPU. "Processor" is used in 103 // and later for arm64) and is shown once per CPU. "Processor" is used in
102 // earler versions and is shown only once at the top of /proc/cpuinfo 104 // earler versions and is shown only once at the top of /proc/cpuinfo
103 // regardless of the number CPUs. 105 // regardless of the number CPUs.
104 const char kModelNamePrefix[] = "model name\t: "; 106 const char kModelNamePrefix[] = "model name\t: ";
105 const char kProcessorPrefix[] = "Processor\t: "; 107 const char kProcessorPrefix[] = "Processor\t: ";
106 108
107 // This function also calculates whether we believe that this CPU has a 109 // This function also calculates whether we believe that this CPU has a
108 // broken NEON unit based on these fields from cpuinfo: 110 // broken NEON unit based on these fields from cpuinfo:
109 unsigned implementer = 0, architecture = 0, variant = 0, part = 0, 111 unsigned implementer = 0, architecture = 0, variant = 0, part = 0,
110 revision = 0; 112 revision = 0;
111 const struct { 113 const struct {
112 const char key[17]; 114 const char key[17];
113 unsigned *result; 115 unsigned int* result;
114 } kUnsignedValues[] = { 116 } kUnsignedValues[] = {
115 {"CPU implementer", &implementer}, 117 {"CPU implementer", &implementer},
116 {"CPU architecture", &architecture}, 118 {"CPU architecture", &architecture},
117 {"CPU variant", &variant}, 119 {"CPU variant", &variant},
118 {"CPU part", &part}, 120 {"CPU part", &part},
119 {"CPU revision", &revision}, 121 {"CPU revision", &revision},
120 }; 122 };
121 123
122 std::string contents; 124 std::string contents;
123 ReadFileToString(FilePath("/proc/cpuinfo"), &contents); 125 ReadFileToString(FilePath("/proc/cpuinfo"), &contents);
(...skipping 25 matching lines...) Expand all
149 151
150 const StringPiece line_sp(line); 152 const StringPiece line_sp(line);
151 StringPiece value_sp = line_sp.substr(colon_pos + 1); 153 StringPiece value_sp = line_sp.substr(colon_pos + 1);
152 while (!value_sp.empty() && 154 while (!value_sp.empty() &&
153 (value_sp[0] == ' ' || value_sp[0] == '\t')) { 155 (value_sp[0] == ' ' || value_sp[0] == '\t')) {
154 value_sp = value_sp.substr(1); 156 value_sp = value_sp.substr(1);
155 } 157 }
156 158
157 // The string may have leading "0x" or not, so we use strtoul to 159 // The string may have leading "0x" or not, so we use strtoul to
158 // handle that. 160 // handle that.
159 char *endptr; 161 char* endptr;
160 std::string value(value_sp.as_string()); 162 std::string value(value_sp.as_string());
161 unsigned long int result = strtoul(value.c_str(), &endptr, 0); 163 unsigned long int result = strtoul(value.c_str(), &endptr, 0);
162 if (*endptr == 0 && result <= UINT_MAX) { 164 if (*endptr == 0 && result <= UINT_MAX) {
163 *kUnsignedValues[i].result = result; 165 *kUnsignedValues[i].result = result;
164 } 166 }
165 } 167 }
166 } 168 }
167 } 169 }
168 170
169 has_broken_neon_ = 171 has_broken_neon_ =
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
204 // CPUInfo[3] | CPUInfo[2]. CPUInfo[2] and CPUInfo[3] are swapped 206 // CPUInfo[3] | CPUInfo[2]. CPUInfo[2] and CPUInfo[3] are swapped
205 // before using memcpy to copy these three array elements to cpu_string. 207 // before using memcpy to copy these three array elements to cpu_string.
206 __cpuid(cpu_info, 0); 208 __cpuid(cpu_info, 0);
207 int num_ids = cpu_info[0]; 209 int num_ids = cpu_info[0];
208 std::swap(cpu_info[2], cpu_info[3]); 210 std::swap(cpu_info[2], cpu_info[3]);
209 memcpy(cpu_string, &cpu_info[1], 3 * sizeof(cpu_info[1])); 211 memcpy(cpu_string, &cpu_info[1], 3 * sizeof(cpu_info[1]));
210 cpu_vendor_.assign(cpu_string, 3 * sizeof(cpu_info[1])); 212 cpu_vendor_.assign(cpu_string, 3 * sizeof(cpu_info[1]));
211 213
212 // Interpret CPU feature information. 214 // Interpret CPU feature information.
213 if (num_ids > 0) { 215 if (num_ids > 0) {
216 int cpu_info7[4] = {0};
214 __cpuid(cpu_info, 1); 217 __cpuid(cpu_info, 1);
218 if (num_ids >= 7) {
219 __cpuid(cpu_info7, 7);
220 }
215 signature_ = cpu_info[0]; 221 signature_ = cpu_info[0];
216 stepping_ = cpu_info[0] & 0xf; 222 stepping_ = cpu_info[0] & 0xf;
217 model_ = ((cpu_info[0] >> 4) & 0xf) + ((cpu_info[0] >> 12) & 0xf0); 223 model_ = ((cpu_info[0] >> 4) & 0xf) + ((cpu_info[0] >> 12) & 0xf0);
218 family_ = (cpu_info[0] >> 8) & 0xf; 224 family_ = (cpu_info[0] >> 8) & 0xf;
219 type_ = (cpu_info[0] >> 12) & 0x3; 225 type_ = (cpu_info[0] >> 12) & 0x3;
220 ext_model_ = (cpu_info[0] >> 16) & 0xf; 226 ext_model_ = (cpu_info[0] >> 16) & 0xf;
221 ext_family_ = (cpu_info[0] >> 20) & 0xff; 227 ext_family_ = (cpu_info[0] >> 20) & 0xff;
222 has_mmx_ = (cpu_info[3] & 0x00800000) != 0; 228 has_mmx_ = (cpu_info[3] & 0x00800000) != 0;
223 has_sse_ = (cpu_info[3] & 0x02000000) != 0; 229 has_sse_ = (cpu_info[3] & 0x02000000) != 0;
224 has_sse2_ = (cpu_info[3] & 0x04000000) != 0; 230 has_sse2_ = (cpu_info[3] & 0x04000000) != 0;
(...skipping 12 matching lines...) Expand all
237 // In addition, we have observed some crashes with the xgetbv instruction 243 // In addition, we have observed some crashes with the xgetbv instruction
238 // even after following Intel's example code. (See crbug.com/375968.) 244 // even after following Intel's example code. (See crbug.com/375968.)
239 // Because of that, we also test the XSAVE bit because its description in 245 // Because of that, we also test the XSAVE bit because its description in
240 // the CPUID documentation suggests that it signals xgetbv support. 246 // the CPUID documentation suggests that it signals xgetbv support.
241 has_avx_ = 247 has_avx_ =
242 has_avx_hardware_ && 248 has_avx_hardware_ &&
243 (cpu_info[2] & 0x04000000) != 0 /* XSAVE */ && 249 (cpu_info[2] & 0x04000000) != 0 /* XSAVE */ &&
244 (cpu_info[2] & 0x08000000) != 0 /* OSXSAVE */ && 250 (cpu_info[2] & 0x08000000) != 0 /* OSXSAVE */ &&
245 (_xgetbv(0) & 6) == 6 /* XSAVE enabled by kernel */; 251 (_xgetbv(0) & 6) == 6 /* XSAVE enabled by kernel */;
246 has_aesni_ = (cpu_info[2] & 0x02000000) != 0; 252 has_aesni_ = (cpu_info[2] & 0x02000000) != 0;
253 has_avx2_ = has_avx_ && (cpu_info7[1] & 0x00000020) != 0;
247 } 254 }
248 255
249 // Get the brand string of the cpu. 256 // Get the brand string of the cpu.
250 __cpuid(cpu_info, 0x80000000); 257 __cpuid(cpu_info, 0x80000000);
251 const int parameter_end = 0x80000004; 258 const int parameter_end = 0x80000004;
252 int max_parameter = cpu_info[0]; 259 int max_parameter = cpu_info[0];
253 260
254 if (cpu_info[0] >= parameter_end) { 261 if (cpu_info[0] >= parameter_end) {
255 char* cpu_string_ptr = cpu_string; 262 char* cpu_string_ptr = cpu_string;
256 263
(...skipping 11 matching lines...) Expand all
268 __cpuid(cpu_info, parameter_containing_non_stop_time_stamp_counter); 275 __cpuid(cpu_info, parameter_containing_non_stop_time_stamp_counter);
269 has_non_stop_time_stamp_counter_ = (cpu_info[3] & (1 << 8)) != 0; 276 has_non_stop_time_stamp_counter_ = (cpu_info[3] & (1 << 8)) != 0;
270 } 277 }
271 #elif defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(OS_LINUX)) 278 #elif defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(OS_LINUX))
272 cpu_brand_.assign(g_lazy_cpuinfo.Get().brand()); 279 cpu_brand_.assign(g_lazy_cpuinfo.Get().brand());
273 has_broken_neon_ = g_lazy_cpuinfo.Get().has_broken_neon(); 280 has_broken_neon_ = g_lazy_cpuinfo.Get().has_broken_neon();
274 #endif 281 #endif
275 } 282 }
276 283
277 CPU::IntelMicroArchitecture CPU::GetIntelMicroArchitecture() const { 284 CPU::IntelMicroArchitecture CPU::GetIntelMicroArchitecture() const {
285 if (has_avx2()) return AVX2;
278 if (has_avx()) return AVX; 286 if (has_avx()) return AVX;
279 if (has_sse42()) return SSE42; 287 if (has_sse42()) return SSE42;
280 if (has_sse41()) return SSE41; 288 if (has_sse41()) return SSE41;
281 if (has_ssse3()) return SSSE3; 289 if (has_ssse3()) return SSSE3;
282 if (has_sse3()) return SSE3; 290 if (has_sse3()) return SSE3;
283 if (has_sse2()) return SSE2; 291 if (has_sse2()) return SSE2;
284 if (has_sse()) return SSE; 292 if (has_sse()) return SSE;
285 return PENTIUM; 293 return PENTIUM;
286 } 294 }
287 295
288 } // namespace base 296 } // namespace base
OLDNEW
« no previous file with comments | « base/cpu.h ('k') | base/cpu_unittest.cc » ('j') | base/cpu_unittest.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698