OLD | NEW |
(Empty) | |
| 1 // Copyright 2015 Google Inc. All rights reserved. |
| 2 // |
| 3 // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 // you may not use this file except in compliance with the License. |
| 5 // You may obtain a copy of the License at |
| 6 // |
| 7 // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 // |
| 9 // Unless required by applicable law or agreed to in writing, software |
| 10 // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 // See the License for the specific language governing permissions and |
| 13 // limitations under the License. |
| 14 |
| 15 #include "sysinfo.h" |
| 16 #include "internal_macros.h" |
| 17 |
| 18 #ifdef BENCHMARK_OS_WINDOWS |
| 19 #include <Shlwapi.h> |
| 20 #include <VersionHelpers.h> |
| 21 #include <Windows.h> |
| 22 #else |
| 23 #include <fcntl.h> |
| 24 #include <sys/resource.h> |
| 25 #include <sys/time.h> |
| 26 #include <sys/types.h> // this header must be included before 'sys/sysctl.h' to
avoid compilation error on FreeBSD |
| 27 #include <unistd.h> |
| 28 #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX |
| 29 #include <sys/sysctl.h> |
| 30 #endif |
| 31 #endif |
| 32 |
| 33 #include <cerrno> |
| 34 #include <cstdint> |
| 35 #include <cstdio> |
| 36 #include <cstdlib> |
| 37 #include <cstring> |
| 38 #include <iostream> |
| 39 #include <limits> |
| 40 #include <mutex> |
| 41 |
| 42 #include "arraysize.h" |
| 43 #include "check.h" |
| 44 #include "cycleclock.h" |
| 45 #include "internal_macros.h" |
| 46 #include "log.h" |
| 47 #include "sleep.h" |
| 48 #include "string_util.h" |
| 49 |
| 50 namespace benchmark { |
| 51 namespace { |
| 52 std::once_flag cpuinfo_init; |
| 53 double cpuinfo_cycles_per_second = 1.0; |
| 54 int cpuinfo_num_cpus = 1; // Conservative guess |
| 55 |
| 56 #if !defined BENCHMARK_OS_MACOSX |
| 57 const int64_t estimate_time_ms = 1000; |
| 58 |
| 59 // Helper function estimates cycles/sec by observing cycles elapsed during |
| 60 // sleep(). Using small sleep time decreases accuracy significantly. |
| 61 int64_t EstimateCyclesPerSecond() { |
| 62 const int64_t start_ticks = cycleclock::Now(); |
| 63 SleepForMilliseconds(estimate_time_ms); |
| 64 return cycleclock::Now() - start_ticks; |
| 65 } |
| 66 #endif |
| 67 |
| 68 #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN |
| 69 // Helper function for reading an int from a file. Returns true if successful |
| 70 // and the memory location pointed to by value is set to the value read. |
| 71 bool ReadIntFromFile(const char* file, long* value) { |
| 72 bool ret = false; |
| 73 int fd = open(file, O_RDONLY); |
| 74 if (fd != -1) { |
| 75 char line[1024]; |
| 76 char* err; |
| 77 memset(line, '\0', sizeof(line)); |
| 78 ssize_t read_err = read(fd, line, sizeof(line) - 1); |
| 79 ((void)read_err); // prevent unused warning |
| 80 CHECK(read_err >= 0); |
| 81 const long temp_value = strtol(line, &err, 10); |
| 82 if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { |
| 83 *value = temp_value; |
| 84 ret = true; |
| 85 } |
| 86 close(fd); |
| 87 } |
| 88 return ret; |
| 89 } |
| 90 #endif |
| 91 |
| 92 #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN |
| 93 static std::string convertToLowerCase(std::string s) { |
| 94 for (auto& ch : s) |
| 95 ch = std::tolower(ch); |
| 96 return s; |
| 97 } |
| 98 static bool startsWithKey(std::string Value, std::string Key, |
| 99 bool IgnoreCase = true) { |
| 100 if (IgnoreCase) { |
| 101 Key = convertToLowerCase(std::move(Key)); |
| 102 Value = convertToLowerCase(std::move(Value)); |
| 103 } |
| 104 return Value.compare(0, Key.size(), Key) == 0; |
| 105 } |
| 106 #endif |
| 107 |
| 108 void InitializeSystemInfo() { |
| 109 #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN |
| 110 char line[1024]; |
| 111 char* err; |
| 112 long freq; |
| 113 |
| 114 bool saw_mhz = false; |
| 115 |
| 116 // If the kernel is exporting the tsc frequency use that. There are issues |
| 117 // where cpuinfo_max_freq cannot be relied on because the BIOS may be |
| 118 // exporintg an invalid p-state (on x86) or p-states may be used to put the |
| 119 // processor in a new mode (turbo mode). Essentially, those frequencies |
| 120 // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as |
| 121 // well. |
| 122 if (!saw_mhz && |
| 123 ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) { |
| 124 // The value is in kHz (as the file name suggests). For example, on a |
| 125 // 2GHz warpstation, the file contains the value "2000000". |
| 126 cpuinfo_cycles_per_second = freq * 1000.0; |
| 127 saw_mhz = true; |
| 128 } |
| 129 |
| 130 // If CPU scaling is in effect, we want to use the *maximum* frequency, |
| 131 // not whatever CPU speed some random processor happens to be using now. |
| 132 if (!saw_mhz && |
| 133 ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", |
| 134 &freq)) { |
| 135 // The value is in kHz. For example, on a 2GHz warpstation, the file |
| 136 // contains the value "2000000". |
| 137 cpuinfo_cycles_per_second = freq * 1000.0; |
| 138 saw_mhz = true; |
| 139 } |
| 140 |
| 141 // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq. |
| 142 const char* pname = "/proc/cpuinfo"; |
| 143 int fd = open(pname, O_RDONLY); |
| 144 if (fd == -1) { |
| 145 perror(pname); |
| 146 if (!saw_mhz) { |
| 147 cpuinfo_cycles_per_second = |
| 148 static_cast<double>(EstimateCyclesPerSecond()); |
| 149 } |
| 150 return; |
| 151 } |
| 152 |
| 153 double bogo_clock = 1.0; |
| 154 bool saw_bogo = false; |
| 155 long max_cpu_id = 0; |
| 156 int num_cpus = 0; |
| 157 line[0] = line[1] = '\0'; |
| 158 size_t chars_read = 0; |
| 159 do { // we'll exit when the last read didn't read anything |
| 160 // Move the next line to the beginning of the buffer |
| 161 const size_t oldlinelen = strlen(line); |
| 162 if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line |
| 163 line[0] = '\0'; |
| 164 else // still other lines left to save |
| 165 memmove(line, line + oldlinelen + 1, sizeof(line) - (oldlinelen + 1)); |
| 166 // Terminate the new line, reading more if we can't find the newline |
| 167 char* newline = strchr(line, '\n'); |
| 168 if (newline == nullptr) { |
| 169 const size_t linelen = strlen(line); |
| 170 const size_t bytes_to_read = sizeof(line) - 1 - linelen; |
| 171 CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes |
| 172 chars_read = read(fd, line + linelen, bytes_to_read); |
| 173 line[linelen + chars_read] = '\0'; |
| 174 newline = strchr(line, '\n'); |
| 175 } |
| 176 if (newline != nullptr) *newline = '\0'; |
| 177 |
| 178 // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only |
| 179 // accept postive values. Some environments (virtual machines) report zero, |
| 180 // which would cause infinite looping in WallTime_Init. |
| 181 if (!saw_mhz && startsWithKey(line, "cpu MHz")) { |
| 182 const char* freqstr = strchr(line, ':'); |
| 183 if (freqstr) { |
| 184 cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0; |
| 185 if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0) |
| 186 saw_mhz = true; |
| 187 } |
| 188 } else if (startsWithKey(line, "bogomips")) { |
| 189 const char* freqstr = strchr(line, ':'); |
| 190 if (freqstr) { |
| 191 bogo_clock = strtod(freqstr + 1, &err) * 1000000.0; |
| 192 if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0) |
| 193 saw_bogo = true; |
| 194 } |
| 195 } else if (startsWithKey(line, "processor", /*IgnoreCase*/false)) { |
| 196 // The above comparison is case-sensitive because ARM kernels often |
| 197 // include a "Processor" line that tells you about the CPU, distinct |
| 198 // from the usual "processor" lines that give you CPU ids. No current |
| 199 // Linux architecture is using "Processor" for CPU ids. |
| 200 num_cpus++; // count up every time we see an "processor :" entry |
| 201 const char* id_str = strchr(line, ':'); |
| 202 if (id_str) { |
| 203 const long cpu_id = strtol(id_str + 1, &err, 10); |
| 204 if (id_str[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id) |
| 205 max_cpu_id = cpu_id; |
| 206 } |
| 207 } |
| 208 } while (chars_read > 0); |
| 209 close(fd); |
| 210 |
| 211 if (!saw_mhz) { |
| 212 if (saw_bogo) { |
| 213 // If we didn't find anything better, we'll use bogomips, but |
| 214 // we're not happy about it. |
| 215 cpuinfo_cycles_per_second = bogo_clock; |
| 216 } else { |
| 217 // If we don't even have bogomips, we'll use the slow estimation. |
| 218 cpuinfo_cycles_per_second = |
| 219 static_cast<double>(EstimateCyclesPerSecond()); |
| 220 } |
| 221 } |
| 222 if (num_cpus == 0) { |
| 223 fprintf(stderr, "Failed to read num. CPUs correctly from /proc/cpuinfo\n"); |
| 224 } else { |
| 225 if ((max_cpu_id + 1) != num_cpus) { |
| 226 fprintf(stderr, |
| 227 "CPU ID assignments in /proc/cpuinfo seem messed up." |
| 228 " This is usually caused by a bad BIOS.\n"); |
| 229 } |
| 230 cpuinfo_num_cpus = num_cpus; |
| 231 } |
| 232 |
| 233 #elif defined BENCHMARK_OS_FREEBSD |
| 234 // For this sysctl to work, the machine must be configured without |
| 235 // SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0 |
| 236 // and later. Before that, it's a 32-bit quantity (and gives the |
| 237 // wrong answer on machines faster than 2^32 Hz). See |
| 238 // http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html |
| 239 // But also compare FreeBSD 7.0: |
| 240 // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223 |
| 241 // 231 error = sysctl_handle_quad(oidp, &freq, 0, req); |
| 242 // To FreeBSD 6.3 (it's the same in 6-STABLE): |
| 243 // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131 |
| 244 // 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); |
| 245 #if __FreeBSD__ >= 7 |
| 246 uint64_t hz = 0; |
| 247 #else |
| 248 unsigned int hz = 0; |
| 249 #endif |
| 250 size_t sz = sizeof(hz); |
| 251 const char* sysctl_path = "machdep.tsc_freq"; |
| 252 if (sysctlbyname(sysctl_path, &hz, &sz, nullptr, 0) != 0) { |
| 253 fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", |
| 254 sysctl_path, strerror(errno)); |
| 255 cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond()); |
| 256 } else { |
| 257 cpuinfo_cycles_per_second = hz; |
| 258 } |
| 259 // TODO: also figure out cpuinfo_num_cpus |
| 260 |
| 261 #elif defined BENCHMARK_OS_WINDOWS |
| 262 // In NT, read MHz from the registry. If we fail to do so or we're in win9x |
| 263 // then make a crude estimate. |
| 264 DWORD data, data_size = sizeof(data); |
| 265 if (IsWindowsXPOrGreater() && |
| 266 SUCCEEDED( |
| 267 SHGetValueA(HKEY_LOCAL_MACHINE, |
| 268 "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", |
| 269 "~MHz", nullptr, &data, &data_size))) |
| 270 cpuinfo_cycles_per_second = |
| 271 static_cast<double>((int64_t)data * (int64_t)(1000 * 1000)); // was mhz |
| 272 else |
| 273 cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond()); |
| 274 |
| 275 SYSTEM_INFO sysinfo; |
| 276 // Use memset as opposed to = {} to avoid GCC missing initializer false |
| 277 // positives. |
| 278 std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO)); |
| 279 GetSystemInfo(&sysinfo); |
| 280 cpuinfo_num_cpus = sysinfo.dwNumberOfProcessors; // number of logical |
| 281 // processors in the current |
| 282 // group |
| 283 |
| 284 #elif defined BENCHMARK_OS_MACOSX |
| 285 int32_t num_cpus = 0; |
| 286 size_t size = sizeof(num_cpus); |
| 287 if (::sysctlbyname("hw.ncpu", &num_cpus, &size, nullptr, 0) == 0 && |
| 288 (size == sizeof(num_cpus))) { |
| 289 cpuinfo_num_cpus = num_cpus; |
| 290 } else { |
| 291 fprintf(stderr, "%s\n", strerror(errno)); |
| 292 std::exit(EXIT_FAILURE); |
| 293 } |
| 294 int64_t cpu_freq = 0; |
| 295 size = sizeof(cpu_freq); |
| 296 if (::sysctlbyname("hw.cpufrequency", &cpu_freq, &size, nullptr, 0) == 0 && |
| 297 (size == sizeof(cpu_freq))) { |
| 298 cpuinfo_cycles_per_second = cpu_freq; |
| 299 } else { |
| 300 #if defined BENCHMARK_OS_IOS |
| 301 fprintf(stderr, "CPU frequency cannot be detected. \n"); |
| 302 cpuinfo_cycles_per_second = 0; |
| 303 #else |
| 304 fprintf(stderr, "%s\n", strerror(errno)); |
| 305 std::exit(EXIT_FAILURE); |
| 306 #endif |
| 307 } |
| 308 #else |
| 309 // Generic cycles per second counter |
| 310 cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond()); |
| 311 #endif |
| 312 } |
| 313 |
| 314 } // end namespace |
| 315 |
| 316 double CyclesPerSecond(void) { |
| 317 std::call_once(cpuinfo_init, InitializeSystemInfo); |
| 318 return cpuinfo_cycles_per_second; |
| 319 } |
| 320 |
| 321 int NumCPUs(void) { |
| 322 std::call_once(cpuinfo_init, InitializeSystemInfo); |
| 323 return cpuinfo_num_cpus; |
| 324 } |
| 325 |
| 326 // The ""'s catch people who don't pass in a literal for "str" |
| 327 #define strliterallen(str) (sizeof("" str "") - 1) |
| 328 |
| 329 // Must use a string literal for prefix. |
| 330 #define memprefix(str, len, prefix) \ |
| 331 ((((len) >= strliterallen(prefix)) && \ |
| 332 std::memcmp(str, prefix, strliterallen(prefix)) == 0) \ |
| 333 ? str + strliterallen(prefix) \ |
| 334 : nullptr) |
| 335 |
| 336 bool CpuScalingEnabled() { |
| 337 #ifndef BENCHMARK_OS_WINDOWS |
| 338 // On Linux, the CPUfreq subsystem exposes CPU information as files on the |
| 339 // local file system. If reading the exported files fails, then we may not be |
| 340 // running on Linux, so we silently ignore all the read errors. |
| 341 for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) { |
| 342 std::string governor_file = |
| 343 StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor"); |
| 344 FILE* file = fopen(governor_file.c_str(), "r"); |
| 345 if (!file) break; |
| 346 char buff[16]; |
| 347 size_t bytes_read = fread(buff, 1, sizeof(buff), file); |
| 348 fclose(file); |
| 349 if (memprefix(buff, bytes_read, "performance") == nullptr) return true; |
| 350 } |
| 351 #endif |
| 352 return false; |
| 353 } |
| 354 |
| 355 } // end namespace benchmark |
OLD | NEW |