Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 | 5 |
| 6 // Windows Timer Primer | 6 // Windows Timer Primer |
| 7 // | 7 // |
| 8 // A good article: http://www.ddj.com/windows/184416651 | 8 // A good article: http://www.ddj.com/windows/184416651 |
| 9 // A good mozilla bug: http://bugzilla.mozilla.org/show_bug.cgi?id=363258 | 9 // A good mozilla bug: http://bugzilla.mozilla.org/show_bug.cgi?id=363258 |
| 10 // | 10 // |
| (...skipping 289 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 300 // TimeTicks ------------------------------------------------------------------ | 300 // TimeTicks ------------------------------------------------------------------ |
| 301 namespace { | 301 namespace { |
| 302 | 302 |
| 303 // We define a wrapper to adapt between the __stdcall and __cdecl call of the | 303 // We define a wrapper to adapt between the __stdcall and __cdecl call of the |
| 304 // mock function, and to avoid a static constructor. Assigning an import to a | 304 // mock function, and to avoid a static constructor. Assigning an import to a |
| 305 // function pointer directly would require setup code to fetch from the IAT. | 305 // function pointer directly would require setup code to fetch from the IAT. |
| 306 DWORD timeGetTimeWrapper() { | 306 DWORD timeGetTimeWrapper() { |
| 307 return timeGetTime(); | 307 return timeGetTime(); |
| 308 } | 308 } |
| 309 | 309 |
| 310 DWORD (*tick_function)(void) = &timeGetTimeWrapper; | 310 DWORD (*g_tick_function)(void) = &timeGetTimeWrapper; |
| 311 | 311 |
| 312 // Accumulation of time lost due to rollover (in milliseconds). | 312 // Accumulation of time lost due to rollover (in milliseconds). |
| 313 int64 rollover_ms = 0; | 313 int64 g_rollover_ms = 0; |
| 314 | 314 |
| 315 // The last timeGetTime value we saw, to detect rollover. | 315 // The last timeGetTime value we saw, to detect rollover. |
| 316 DWORD last_seen_now = 0; | 316 DWORD g_last_seen_now = 0; |
| 317 | 317 |
| 318 // Lock protecting rollover_ms and last_seen_now. | 318 // Lock protecting rollover_ms and last_seen_now. |
| 319 // Note: this is a global object, and we usually avoid these. However, the time | 319 // Note: this is a global object, and we usually avoid these. However, the time |
| 320 // code is low-level, and we don't want to use Singletons here (it would be too | 320 // code is low-level, and we don't want to use Singletons here (it would be too |
| 321 // easy to use a Singleton without even knowing it, and that may lead to many | 321 // easy to use a Singleton without even knowing it, and that may lead to many |
| 322 // gotchas). Its impact on startup time should be negligible due to low-level | 322 // gotchas). Its impact on startup time should be negligible due to low-level |
| 323 // nature of time code. | 323 // nature of time code. |
| 324 base::Lock rollover_lock; | 324 base::Lock g_rollover_lock; |
| 325 | 325 |
| 326 // We use timeGetTime() to implement TimeTicks::Now(). This can be problematic | 326 // We use timeGetTime() to implement TimeTicks::Now(). This can be problematic |
| 327 // because it returns the number of milliseconds since Windows has started, | 327 // because it returns the number of milliseconds since Windows has started, |
| 328 // which will roll over the 32-bit value every ~49 days. We try to track | 328 // which will roll over the 32-bit value every ~49 days. We try to track |
| 329 // rollover ourselves, which works if TimeTicks::Now() is called at least every | 329 // rollover ourselves, which works if TimeTicks::Now() is called at least every |
| 330 // 49 days. | 330 // 49 days. |
| 331 TimeDelta RolloverProtectedNow() { | 331 TimeTicks RolloverProtectedNow() { |
| 332 base::AutoLock locked(rollover_lock); | 332 base::AutoLock locked(g_rollover_lock); |
| 333 // We should hold the lock while calling tick_function to make sure that | 333 // We should hold the lock while calling tick_function to make sure that |
| 334 // we keep last_seen_now stay correctly in sync. | 334 // we keep last_seen_now stay correctly in sync. |
| 335 DWORD now = tick_function(); | 335 DWORD now = g_tick_function(); |
| 336 if (now < last_seen_now) | 336 if (now < g_last_seen_now) |
| 337 rollover_ms += 0x100000000I64; // ~49.7 days. | 337 g_rollover_ms += 0x100000000I64; // ~49.7 days. |
| 338 last_seen_now = now; | 338 g_last_seen_now = now; |
| 339 return TimeDelta::FromMilliseconds(now + rollover_ms); | 339 return TimeTicks() + TimeDelta::FromMilliseconds(now + g_rollover_ms); |
| 340 } | 340 } |
| 341 | 341 |
| 342 bool IsBuggyAthlon(const base::CPU& cpu) { | 342 // Discussion of tick counter options on Windows: |
| 343 // On Athlon X2 CPUs (e.g. model 15) QueryPerformanceCounter is | 343 // |
| 344 // unreliable. Fallback to low-res clock. | |
| 345 return cpu.vendor_name() == "AuthenticAMD" && cpu.family() == 15; | |
| 346 } | |
| 347 | |
| 348 // Overview of time counters: | |
| 349 // (1) CPU cycle counter. (Retrieved via RDTSC) | 344 // (1) CPU cycle counter. (Retrieved via RDTSC) |
| 350 // The CPU counter provides the highest resolution time stamp and is the least | 345 // The CPU counter provides the highest resolution time stamp and is the least |
| 351 // expensive to retrieve. However, the CPU counter is unreliable and should not | 346 // expensive to retrieve. However, on older CPUs, two issues can affect its |
| 352 // be used in production. Its biggest issue is that it is per processor and it | 347 // reliability: First it is maintained per processor and not synchronized |
| 353 // is not synchronized between processors. Also, on some computers, the counters | 348 // between processors. Also, the counters will change frequency due to thermal |
| 354 // will change frequency due to thermal and power changes, and stop in some | 349 // and power changes, and stop in some states. |
| 355 // states. | |
| 356 // | 350 // |
| 357 // (2) QueryPerformanceCounter (QPC). The QPC counter provides a high- | 351 // (2) QueryPerformanceCounter (QPC). The QPC counter provides a high- |
| 358 // resolution (100 nanoseconds) time stamp but is comparatively more expensive | 352 // resolution (<1 microsecond) time stamp. On most hardware running today, it |
| 359 // to retrieve. What QueryPerformanceCounter actually does is up to the HAL. | 353 // auto-detects and uses the constant-rate RDTSC counter to provide extremely |
| 360 // (with some help from ACPI). | 354 // efficient and reliable time stamps. |
| 361 // According to http://blogs.msdn.com/oldnewthing/archive/2005/09/02/459952.aspx | 355 // |
| 362 // in the worst case, it gets the counter from the rollover interrupt on the | 356 // On older CPUs where RDTSC is unreliable, it falls back to using more |
| 357 // expensive (20X to 40X more costly) alternate clocks, such as HPET or the ACPI | |
| 358 // PM timer, and can involve system calls; and all this is up to the HAL (with | |
| 359 // some help from ACPI). According to | |
| 360 // http://blogs.msdn.com/oldnewthing/archive/2005/09/02/459952.aspx, in the | |
| 361 // worst case, it gets the counter from the rollover interrupt on the | |
| 363 // programmable interrupt timer. In best cases, the HAL may conclude that the | 362 // programmable interrupt timer. In best cases, the HAL may conclude that the |
| 364 // RDTSC counter runs at a constant frequency, then it uses that instead. On | 363 // RDTSC counter runs at a constant frequency, then it uses that instead. On |
| 365 // multiprocessor machines, it will try to verify the values returned from | 364 // multiprocessor machines, it will try to verify the values returned from |
| 366 // RDTSC on each processor are consistent with each other, and apply a handful | 365 // RDTSC on each processor are consistent with each other, and apply a handful |
| 367 // of workarounds for known buggy hardware. In other words, QPC is supposed to | 366 // of workarounds for known buggy hardware. In other words, QPC is supposed to |
| 368 // give consistent result on a multiprocessor computer, but it is unreliable in | 367 // give consistent results on a multiprocessor computer, but for older CPUs it |
| 369 // reality due to bugs in BIOS or HAL on some, especially old computers. | 368 // can be unreliable due bugs in BIOS or HAL. |
| 370 // With recent updates on HAL and newer BIOS, QPC is getting more reliable but | |
| 371 // it should be used with caution. | |
| 372 // | 369 // |
| 373 // (3) System time. The system time provides a low-resolution (typically 10ms | 370 // (3) System time. The system time provides a low-resolution (from ~1 to ~15.6 |
| 374 // to 55 milliseconds) time stamp but is comparatively less expensive to | 371 // milliseconds) time stamp but is comparatively less expensive to retrieve and |
| 375 // retrieve and more reliable. | 372 // more reliable. Time::EnableHighResolutionTimer() and |
| 376 class HighResNowSingleton { | 373 // Time::ActivateHighResolutionTimer() can be called to alter the resolution of |
| 377 public: | 374 // this timer; and also other Windows applications can alter it, affecting this |
| 378 HighResNowSingleton() | 375 // one. |
| 379 : ticks_per_second_(0), | |
| 380 skew_(0) { | |
| 381 | 376 |
| 382 base::CPU cpu; | 377 using NowFunction = TimeTicks (*)(void); |
| 383 if (IsBuggyAthlon(cpu)) | |
| 384 return; | |
| 385 | 378 |
| 386 // Synchronize the QPC clock with GetSystemTimeAsFileTime. | 379 TimeTicks InitialNowFunction(); |
| 387 LARGE_INTEGER ticks_per_sec = {0}; | 380 TimeTicks InitialSystemTraceNowFunction(); |
| 388 if (!QueryPerformanceFrequency(&ticks_per_sec)) | |
| 389 return; // QPC is not available. | |
| 390 ticks_per_second_ = ticks_per_sec.QuadPart; | |
| 391 | 381 |
| 392 skew_ = UnreliableNow() - ReliableNow(); | 382 // See "threading notes" in InitializeNowFunctionPointers() for details on how |
| 383 // concurrent reads/writes to these globals has been made safe. | |
| 384 NowFunction g_now_function = &InitialNowFunction; | |
| 385 NowFunction g_system_trace_now_function = &InitialSystemTraceNowFunction; | |
| 386 int64 g_qpc_ticks_per_second = 0; | |
| 387 | |
| 388 // As of January 2015, use of <atomic> is forbidden in Chromium code. This is | |
| 389 // what std::atomic_thread_fence does on Windows on all Intel architectures: | |
|
brucedawson
2015/01/08 19:37:48
To be clear, this is what atomic_thread_fence does
miu
2015/01/08 19:45:56
Done.
| |
| 390 #define atomic_thread_fence(memory_order) _ReadWriteBarrier(); | |
|
jamesr
2015/01/08 20:31:54
this doesn't follow macro naming rules: http://goo
miu
2015/01/08 22:16:00
Done. Fixed capitalization.
| |
| 391 | |
| 392 TimeDelta QPCValueToTimeDelta(LONGLONG qpc_value) { | |
| 393 // Ensure that the assignment to |g_qpc_ticks_per_second|, made in | |
| 394 // InitializeNowFunctionPointers(), has happened by this point. | |
| 395 atomic_thread_fence(memory_order_acquire); | |
| 396 | |
| 397 DCHECK_GT(g_qpc_ticks_per_second, 0); | |
| 398 | |
| 399 // If the QPC Value is below the overflow threshold, we proceed with | |
| 400 // simple multiply and divide. | |
| 401 if (qpc_value < Time::kQPCOverflowThreshold) { | |
| 402 return TimeDelta::FromMicroseconds( | |
| 403 qpc_value * Time::kMicrosecondsPerSecond / g_qpc_ticks_per_second); | |
| 404 } | |
| 405 // Otherwise, calculate microseconds in a round about manner to avoid | |
| 406 // overflow and precision issues. | |
| 407 int64 whole_seconds = qpc_value / g_qpc_ticks_per_second; | |
| 408 int64 leftover_ticks = qpc_value - (whole_seconds * g_qpc_ticks_per_second); | |
| 409 return TimeDelta::FromMicroseconds( | |
| 410 (whole_seconds * Time::kMicrosecondsPerSecond) + | |
| 411 ((leftover_ticks * Time::kMicrosecondsPerSecond) / | |
| 412 g_qpc_ticks_per_second)); | |
| 413 } | |
| 414 | |
| 415 TimeTicks QPCNow() { | |
| 416 LARGE_INTEGER now; | |
| 417 QueryPerformanceCounter(&now); | |
| 418 return TimeTicks() + QPCValueToTimeDelta(now.QuadPart); | |
| 419 } | |
| 420 | |
| 421 bool IsBuggyAthlon(const base::CPU& cpu) { | |
| 422 // On Athlon X2 CPUs (e.g. model 15) QueryPerformanceCounter is unreliable. | |
| 423 return cpu.vendor_name() == "AuthenticAMD" && cpu.family() == 15; | |
| 424 } | |
| 425 | |
| 426 void InitializeNowFunctionPointers() { | |
| 427 LARGE_INTEGER ticks_per_sec = {0}; | |
| 428 if (!QueryPerformanceFrequency(&ticks_per_sec)) | |
| 429 ticks_per_sec.QuadPart = 0; | |
| 430 | |
| 431 // If Windows cannot provide a QPC implementation, both Now() and | |
| 432 // NowFromSystemTraceTime() must use the low-resolution clock. | |
| 433 // | |
| 434 // If the QPC implementation is expensive and/or unreliable, Now() will use | |
| 435 // the low-resolution clock, but NowFromSystemTraceTime() will use the QPC (in | |
| 436 // the hope that it is still useful for tracing purposes). A CPU lacking a | |
| 437 // non-stop time counter will cause Windows to provide an alternate QPC | |
| 438 // implementation that works, but is expensive to use. Certain Athlon CPUs are | |
| 439 // known to make the QPC implementation unreliable. | |
| 440 // | |
| 441 // Otherwise, both Now functions can use the high-resolution QPC clock. As of | |
| 442 // 4 January 2015, ~68% of users fall within this category. | |
| 443 NowFunction now_function; | |
| 444 NowFunction system_trace_now_function; | |
| 445 base::CPU cpu; | |
| 446 if (ticks_per_sec.QuadPart <= 0) { | |
| 447 now_function = system_trace_now_function = &RolloverProtectedNow; | |
| 448 } else if (!cpu.has_non_stop_time_stamp_counter() || IsBuggyAthlon(cpu)) { | |
| 449 now_function = &RolloverProtectedNow; | |
| 450 system_trace_now_function = &QPCNow; | |
| 451 } else { | |
| 452 now_function = system_trace_now_function = &QPCNow; | |
| 393 } | 453 } |
| 394 | 454 |
| 395 bool IsUsingHighResClock() { | 455 // Threading note 1: In an unlikely race condition, it's possible for two or |
| 396 return ticks_per_second_ != 0; | 456 // more threads to enter InitializeNowFunctionPointers() in parallel. This is |
| 397 } | 457 // not a problem since all threads should end up writing out the same values |
| 398 | 458 // to the global variables. |
| 399 TimeDelta Now() { | 459 // |
| 400 if (IsUsingHighResClock()) | 460 // Threading note 2: A release fence is placed here to ensure, from the |
| 401 return TimeDelta::FromMicroseconds(UnreliableNow()); | 461 // perspective of other threads using the function pointers, that the |
| 402 | 462 // assignment to |g_qpc_ticks_per_second| happens before the function pointers |
| 403 // Just fallback to the slower clock. | 463 // are changed. |
| 404 return RolloverProtectedNow(); | 464 g_qpc_ticks_per_second = ticks_per_sec.QuadPart; |
| 405 } | 465 atomic_thread_fence(memory_order_release); |
| 406 | 466 g_now_function = now_function; |
| 407 int64 GetQPCDriftMicroseconds() { | 467 g_system_trace_now_function = system_trace_now_function; |
| 408 if (!IsUsingHighResClock()) | |
| 409 return 0; | |
| 410 return abs((UnreliableNow() - ReliableNow()) - skew_); | |
| 411 } | |
| 412 | |
| 413 int64 QPCValueToMicroseconds(LONGLONG qpc_value) { | |
| 414 if (!ticks_per_second_) | |
| 415 return 0; | |
| 416 // If the QPC Value is below the overflow threshold, we proceed with | |
| 417 // simple multiply and divide. | |
| 418 if (qpc_value < Time::kQPCOverflowThreshold) | |
| 419 return qpc_value * Time::kMicrosecondsPerSecond / ticks_per_second_; | |
| 420 // Otherwise, calculate microseconds in a round about manner to avoid | |
| 421 // overflow and precision issues. | |
| 422 int64 whole_seconds = qpc_value / ticks_per_second_; | |
| 423 int64 leftover_ticks = qpc_value - (whole_seconds * ticks_per_second_); | |
| 424 int64 microseconds = (whole_seconds * Time::kMicrosecondsPerSecond) + | |
| 425 ((leftover_ticks * Time::kMicrosecondsPerSecond) / | |
| 426 ticks_per_second_); | |
| 427 return microseconds; | |
| 428 } | |
| 429 | |
| 430 private: | |
| 431 // Get the number of microseconds since boot in an unreliable fashion. | |
| 432 int64 UnreliableNow() { | |
| 433 LARGE_INTEGER now; | |
| 434 QueryPerformanceCounter(&now); | |
| 435 return QPCValueToMicroseconds(now.QuadPart); | |
| 436 } | |
| 437 | |
| 438 // Get the number of microseconds since boot in a reliable fashion. | |
| 439 int64 ReliableNow() { | |
| 440 return RolloverProtectedNow().InMicroseconds(); | |
| 441 } | |
| 442 | |
| 443 int64 ticks_per_second_; // 0 indicates QPF failed and we're broken. | |
| 444 int64 skew_; // Skew between lo-res and hi-res clocks (for debugging). | |
| 445 }; | |
| 446 | |
| 447 static base::LazyInstance<HighResNowSingleton>::Leaky | |
| 448 leaky_high_res_now_singleton = LAZY_INSTANCE_INITIALIZER; | |
| 449 | |
| 450 HighResNowSingleton* GetHighResNowSingleton() { | |
| 451 return leaky_high_res_now_singleton.Pointer(); | |
| 452 } | 468 } |
| 453 | 469 |
| 454 TimeDelta HighResNowWrapper() { | 470 TimeTicks InitialNowFunction() { |
| 455 return GetHighResNowSingleton()->Now(); | 471 InitializeNowFunctionPointers(); |
| 472 return g_now_function(); | |
| 456 } | 473 } |
| 457 | 474 |
| 458 typedef TimeDelta (*NowFunction)(void); | 475 TimeTicks InitialSystemTraceNowFunction() { |
| 459 | 476 InitializeNowFunctionPointers(); |
| 460 bool CPUReliablySupportsHighResTime() { | 477 return g_system_trace_now_function(); |
| 461 base::CPU cpu; | |
| 462 if (!cpu.has_non_stop_time_stamp_counter() || | |
| 463 !GetHighResNowSingleton()->IsUsingHighResClock()) | |
| 464 return false; | |
| 465 | |
| 466 if (IsBuggyAthlon(cpu)) | |
| 467 return false; | |
| 468 | |
| 469 return true; | |
| 470 } | |
| 471 | |
| 472 TimeDelta InitialNowFunction(); | |
| 473 | |
| 474 volatile NowFunction now_function = InitialNowFunction; | |
| 475 | |
| 476 TimeDelta InitialNowFunction() { | |
| 477 if (!CPUReliablySupportsHighResTime()) { | |
| 478 InterlockedExchangePointer( | |
| 479 reinterpret_cast<void* volatile*>(&now_function), | |
| 480 &RolloverProtectedNow); | |
| 481 return RolloverProtectedNow(); | |
| 482 } | |
| 483 InterlockedExchangePointer( | |
| 484 reinterpret_cast<void* volatile*>(&now_function), | |
| 485 &HighResNowWrapper); | |
| 486 return HighResNowWrapper(); | |
| 487 } | 478 } |
| 488 | 479 |
| 489 } // namespace | 480 } // namespace |
| 490 | 481 |
| 491 // static | 482 // static |
| 492 TimeTicks::TickFunctionType TimeTicks::SetMockTickFunction( | 483 TimeTicks::TickFunctionType TimeTicks::SetMockTickFunction( |
| 493 TickFunctionType ticker) { | 484 TickFunctionType ticker) { |
| 494 base::AutoLock locked(rollover_lock); | 485 base::AutoLock locked(g_rollover_lock); |
| 495 TickFunctionType old = tick_function; | 486 TickFunctionType old = g_tick_function; |
| 496 tick_function = ticker; | 487 g_tick_function = ticker; |
| 497 rollover_ms = 0; | 488 g_rollover_ms = 0; |
| 498 last_seen_now = 0; | 489 g_last_seen_now = 0; |
| 499 return old; | 490 return old; |
| 500 } | 491 } |
| 501 | 492 |
| 502 // static | 493 // static |
| 503 TimeTicks TimeTicks::Now() { | 494 TimeTicks TimeTicks::Now() { |
| 504 return TimeTicks() + now_function(); | 495 return g_now_function(); |
| 505 } | 496 } |
| 506 | 497 |
| 507 // static | 498 // static |
| 508 TimeTicks TimeTicks::HighResNow() { | 499 bool TimeTicks::IsHighResolution() { |
| 509 return TimeTicks() + HighResNowWrapper(); | 500 if (g_now_function == &InitialNowFunction) |
| 501 InitializeNowFunctionPointers(); | |
| 502 return g_now_function == &QPCNow; | |
| 510 } | 503 } |
| 511 | 504 |
| 512 // static | 505 // static |
| 513 bool TimeTicks::IsHighResNowFastAndReliable() { | |
| 514 return CPUReliablySupportsHighResTime(); | |
| 515 } | |
| 516 | |
| 517 // static | |
| 518 TimeTicks TimeTicks::ThreadNow() { | 506 TimeTicks TimeTicks::ThreadNow() { |
| 519 NOTREACHED(); | 507 NOTREACHED(); |
| 520 return TimeTicks(); | 508 return TimeTicks(); |
| 521 } | 509 } |
| 522 | 510 |
| 523 // static | 511 // static |
| 524 TimeTicks TimeTicks::NowFromSystemTraceTime() { | 512 TimeTicks TimeTicks::NowFromSystemTraceTime() { |
| 525 return HighResNow(); | 513 return g_system_trace_now_function(); |
| 526 } | |
| 527 | |
| 528 // static | |
| 529 int64 TimeTicks::GetQPCDriftMicroseconds() { | |
| 530 return GetHighResNowSingleton()->GetQPCDriftMicroseconds(); | |
| 531 } | 514 } |
| 532 | 515 |
| 533 // static | 516 // static |
| 534 TimeTicks TimeTicks::FromQPCValue(LONGLONG qpc_value) { | 517 TimeTicks TimeTicks::FromQPCValue(LONGLONG qpc_value) { |
| 535 return TimeTicks(GetHighResNowSingleton()->QPCValueToMicroseconds(qpc_value)); | 518 return TimeTicks() + QPCValueToTimeDelta(qpc_value); |
| 536 } | |
| 537 | |
| 538 // static | |
| 539 bool TimeTicks::IsHighResClockWorking() { | |
| 540 return GetHighResNowSingleton()->IsUsingHighResClock(); | |
| 541 } | 519 } |
| 542 | 520 |
| 543 // TimeDelta ------------------------------------------------------------------ | 521 // TimeDelta ------------------------------------------------------------------ |
| 544 | 522 |
| 545 // static | 523 // static |
| 546 TimeDelta TimeDelta::FromQPCValue(LONGLONG qpc_value) { | 524 TimeDelta TimeDelta::FromQPCValue(LONGLONG qpc_value) { |
| 547 return TimeDelta(GetHighResNowSingleton()->QPCValueToMicroseconds(qpc_value)); | 525 return QPCValueToTimeDelta(qpc_value); |
| 548 } | 526 } |
| OLD | NEW |