Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 | 5 |
| 6 // Windows Timer Primer | 6 // Windows Timer Primer |
| 7 // | 7 // |
| 8 // A good article: http://www.ddj.com/windows/184416651 | 8 // A good article: http://www.ddj.com/windows/184416651 |
| 9 // A good mozilla bug: http://bugzilla.mozilla.org/show_bug.cgi?id=363258 | 9 // A good mozilla bug: http://bugzilla.mozilla.org/show_bug.cgi?id=363258 |
| 10 // | 10 // |
| (...skipping 289 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 300 // TimeTicks ------------------------------------------------------------------ | 300 // TimeTicks ------------------------------------------------------------------ |
| 301 namespace { | 301 namespace { |
| 302 | 302 |
| 303 // We define a wrapper to adapt between the __stdcall and __cdecl call of the | 303 // We define a wrapper to adapt between the __stdcall and __cdecl call of the |
| 304 // mock function, and to avoid a static constructor. Assigning an import to a | 304 // mock function, and to avoid a static constructor. Assigning an import to a |
| 305 // function pointer directly would require setup code to fetch from the IAT. | 305 // function pointer directly would require setup code to fetch from the IAT. |
| 306 DWORD timeGetTimeWrapper() { | 306 DWORD timeGetTimeWrapper() { |
| 307 return timeGetTime(); | 307 return timeGetTime(); |
| 308 } | 308 } |
| 309 | 309 |
| 310 DWORD (*tick_function)(void) = &timeGetTimeWrapper; | 310 DWORD (*g_tick_function)(void) = &timeGetTimeWrapper; |
| 311 | 311 |
| 312 // Accumulation of time lost due to rollover (in milliseconds). | 312 // Accumulation of time lost due to rollover (in milliseconds). |
| 313 int64 rollover_ms = 0; | 313 int64 g_rollover_ms = 0; |
| 314 | 314 |
| 315 // The last timeGetTime value we saw, to detect rollover. | 315 // The last timeGetTime value we saw, to detect rollover. |
| 316 DWORD last_seen_now = 0; | 316 DWORD g_last_seen_now = 0; |
| 317 | 317 |
| 318 // Lock protecting rollover_ms and last_seen_now. | 318 // Lock protecting rollover_ms and last_seen_now. |
| 319 // Note: this is a global object, and we usually avoid these. However, the time | 319 // Note: this is a global object, and we usually avoid these. However, the time |
| 320 // code is low-level, and we don't want to use Singletons here (it would be too | 320 // code is low-level, and we don't want to use Singletons here (it would be too |
| 321 // easy to use a Singleton without even knowing it, and that may lead to many | 321 // easy to use a Singleton without even knowing it, and that may lead to many |
| 322 // gotchas). Its impact on startup time should be negligible due to low-level | 322 // gotchas). Its impact on startup time should be negligible due to low-level |
| 323 // nature of time code. | 323 // nature of time code. |
| 324 base::Lock rollover_lock; | 324 base::Lock g_rollover_lock; |
| 325 | 325 |
| 326 // We use timeGetTime() to implement TimeTicks::Now(). This can be problematic | 326 // We use timeGetTime() to implement TimeTicks::Now(). This can be problematic |
| 327 // because it returns the number of milliseconds since Windows has started, | 327 // because it returns the number of milliseconds since Windows has started, |
| 328 // which will roll over the 32-bit value every ~49 days. We try to track | 328 // which will roll over the 32-bit value every ~49 days. We try to track |
| 329 // rollover ourselves, which works if TimeTicks::Now() is called at least every | 329 // rollover ourselves, which works if TimeTicks::Now() is called at least every |
| 330 // 49 days. | 330 // 49 days. |
| 331 TimeDelta RolloverProtectedNow() { | 331 TimeTicks RolloverProtectedNow() { |
| 332 base::AutoLock locked(rollover_lock); | 332 base::AutoLock locked(g_rollover_lock); |
| 333 // We should hold the lock while calling tick_function to make sure that | 333 // We should hold the lock while calling tick_function to make sure that |
| 334 // we keep last_seen_now stay correctly in sync. | 334 // we keep last_seen_now stay correctly in sync. |
| 335 DWORD now = tick_function(); | 335 DWORD now = g_tick_function(); |
| 336 if (now < last_seen_now) | 336 if (now < g_last_seen_now) |
| 337 rollover_ms += 0x100000000I64; // ~49.7 days. | 337 g_rollover_ms += 0x100000000I64; // ~49.7 days. |
| 338 last_seen_now = now; | 338 g_last_seen_now = now; |
| 339 return TimeDelta::FromMilliseconds(now + rollover_ms); | 339 return TimeTicks() + TimeDelta::FromMilliseconds(now + g_rollover_ms); |
| 340 } | 340 } |
| 341 | 341 |
| 342 bool IsBuggyAthlon(const base::CPU& cpu) { | 342 // Discussion of tick counter options on Windows: |
| 343 // On Athlon X2 CPUs (e.g. model 15) QueryPerformanceCounter is | 343 // |
| 344 // unreliable. Fallback to low-res clock. | |
| 345 return cpu.vendor_name() == "AuthenticAMD" && cpu.family() == 15; | |
| 346 } | |
| 347 | |
| 348 // Overview of time counters: | |
| 349 // (1) CPU cycle counter. (Retrieved via RDTSC) | 344 // (1) CPU cycle counter. (Retrieved via RDTSC) |
| 350 // The CPU counter provides the highest resolution time stamp and is the least | 345 // The CPU counter provides the highest resolution time stamp and is the least |
| 351 // expensive to retrieve. However, the CPU counter is unreliable and should not | 346 // expensive to retrieve. However, the CPU counter is unreliable and should not |
| 352 // be used in production. Its biggest issue is that it is per processor and it | 347 // be used in production. Its biggest issue is that it is per processor and it |
| 353 // is not synchronized between processors. Also, on some computers, the counters | 348 // is not synchronized between processors. Also, on some computers, the counters |
| 354 // will change frequency due to thermal and power changes, and stop in some | 349 // will change frequency due to thermal and power changes, and stop in some |
| 355 // states. | 350 // states. |
| 356 // | 351 // |
| 357 // (2) QueryPerformanceCounter (QPC). The QPC counter provides a high- | 352 // (2) QueryPerformanceCounter (QPC). The QPC counter provides a high- |
| 358 // resolution (100 nanoseconds) time stamp but is comparatively more expensive | 353 // resolution (100 nanoseconds) time stamp but is comparatively more expensive |
| 359 // to retrieve. What QueryPerformanceCounter actually does is up to the HAL. | 354 // to retrieve. What QueryPerformanceCounter actually does is up to the HAL. |
| 360 // (with some help from ACPI). | 355 // (with some help from ACPI). |
| 361 // According to http://blogs.msdn.com/oldnewthing/archive/2005/09/02/459952.aspx | 356 // According to http://blogs.msdn.com/oldnewthing/archive/2005/09/02/459952.aspx |
| 362 // in the worst case, it gets the counter from the rollover interrupt on the | 357 // in the worst case, it gets the counter from the rollover interrupt on the |
| 363 // programmable interrupt timer. In best cases, the HAL may conclude that the | 358 // programmable interrupt timer. In best cases, the HAL may conclude that the |
| 364 // RDTSC counter runs at a constant frequency, then it uses that instead. On | 359 // RDTSC counter runs at a constant frequency, then it uses that instead. On |
| 365 // multiprocessor machines, it will try to verify the values returned from | 360 // multiprocessor machines, it will try to verify the values returned from |
| 366 // RDTSC on each processor are consistent with each other, and apply a handful | 361 // RDTSC on each processor are consistent with each other, and apply a handful |
| 367 // of workarounds for known buggy hardware. In other words, QPC is supposed to | 362 // of workarounds for known buggy hardware. In other words, QPC is supposed to |
| 368 // give consistent result on a multiprocessor computer, but it is unreliable in | 363 // give consistent result on a multiprocessor computer, but it is unreliable in |
| 369 // reality due to bugs in BIOS or HAL on some, especially old computers. | 364 // reality due to bugs in BIOS or HAL on some, especially old computers. |
| 370 // With recent updates on HAL and newer BIOS, QPC is getting more reliable but | 365 // With recent updates on HAL and newer BIOS, QPC is getting more reliable but |
| 371 // it should be used with caution. | 366 // it should be used with caution. |
| 372 // | 367 // |
| 373 // (3) System time. The system time provides a low-resolution (typically 10ms | 368 // (3) System time. The system time provides a low-resolution (typically 10ms |
| 374 // to 55 milliseconds) time stamp but is comparatively less expensive to | 369 // to 55 milliseconds) time stamp but is comparatively less expensive to |
| 375 // retrieve and more reliable. | 370 // retrieve and more reliable. |
| 376 class HighResNowSingleton { | |
| 377 public: | |
| 378 HighResNowSingleton() | |
| 379 : ticks_per_second_(0), | |
| 380 skew_(0) { | |
| 381 | 371 |
| 382 base::CPU cpu; | 372 using NowFunction = TimeTicks (*)(void); |
| 383 if (IsBuggyAthlon(cpu)) | |
| 384 return; | |
| 385 | 373 |
| 386 // Synchronize the QPC clock with GetSystemTimeAsFileTime. | 374 TimeTicks InitialNowFunction(); |
| 387 LARGE_INTEGER ticks_per_sec = {0}; | 375 TimeTicks InitialSystemTraceNowFunction(); |
| 388 if (!QueryPerformanceFrequency(&ticks_per_sec)) | |
| 389 return; // QPC is not available. | |
| 390 ticks_per_second_ = ticks_per_sec.QuadPart; | |
| 391 | 376 |
| 392 skew_ = UnreliableNow() - ReliableNow(); | 377 volatile NowFunction g_now_function = &InitialNowFunction; |
| 378 volatile NowFunction g_system_trace_now_function = | |
| 379 &InitialSystemTraceNowFunction; | |
| 380 int64 g_qpc_ticks_per_second = 0; | |
| 381 | |
| 382 TimeDelta QPCValueToTimeDelta(LONGLONG qpc_value) { | |
| 383 DCHECK_GT(g_qpc_ticks_per_second, 0); | |
| 384 | |
| 385 // If the QPC Value is below the overflow threshold, we proceed with | |
| 386 // simple multiply and divide. | |
| 387 if (qpc_value < Time::kQPCOverflowThreshold) { | |
| 388 return TimeDelta::FromMicroseconds( | |
| 389 qpc_value * Time::kMicrosecondsPerSecond / g_qpc_ticks_per_second); | |
| 390 } | |
| 391 // Otherwise, calculate microseconds in a round about manner to avoid | |
| 392 // overflow and precision issues. | |
| 393 int64 whole_seconds = qpc_value / g_qpc_ticks_per_second; | |
| 394 int64 leftover_ticks = qpc_value - (whole_seconds * g_qpc_ticks_per_second); | |
| 395 return TimeDelta::FromMicroseconds( | |
| 396 (whole_seconds * Time::kMicrosecondsPerSecond) + | |
| 397 ((leftover_ticks * Time::kMicrosecondsPerSecond) / | |
| 398 g_qpc_ticks_per_second)); | |
| 399 } | |
| 400 | |
| 401 TimeTicks QPCNow() { | |
| 402 LARGE_INTEGER now; | |
| 403 QueryPerformanceCounter(&now); | |
| 404 return TimeTicks() + QPCValueToTimeDelta(now.QuadPart); | |
| 405 } | |
| 406 | |
| 407 void InitializeNowFunctionPointers() { | |
| 408 LARGE_INTEGER ticks_per_sec = {0}; | |
| 409 if (QueryPerformanceFrequency(&ticks_per_sec)) | |
| 410 g_qpc_ticks_per_second = ticks_per_sec.QuadPart; | |
|
brianderson
2015/01/07 00:53:44
g_qpc_ticks_per_second is 64-bits.
According to h
DaleCurtis
2015/01/07 01:38:36
Yeah this seems fishy, for the same reason that th
miu
2015/01/07 05:22:45
Fixed. My solution is to get rid of all the Inter
| |
| 411 else | |
| 412 g_qpc_ticks_per_second = 0; | |
| 413 | |
| 414 // If Windows does not offer a working QPC implementation, both Now() and | |
| 415 // NowFromSystemTraceTime() must use the low-resolution clock. Note that | |
| 416 // Windows may report a working QPC implementation even on certain Athlon X2 | |
| 417 // CPUs (where QPC has been shown to be unreliable). | |
| 418 // | |
| 419 // Otherwise, if the CPU does not have a non-stop time counter, assume Windows | |
| 420 // will provide an alternate QPC implementation that works, but is expensive | |
| 421 // to use. In this case, Now() should use the inexpensive, low-resolution | |
| 422 // clock and NowFromSystemTraceTime() will use the expensive-but-working QPC | |
| 423 // clock. | |
| 424 // | |
| 425 // Otherwise, both Now functions can use the high-resolution QPC clock. As of | |
| 426 // 4 January 2015, ~68% of users fall within this category. | |
| 427 NowFunction now_function; | |
| 428 NowFunction system_trace_now_function; | |
| 429 base::CPU cpu; | |
| 430 if ((g_qpc_ticks_per_second <= 0) || | |
| 431 (cpu.vendor_name() == "AuthenticAMD" && cpu.family() == 15)) { | |
|
brianderson
2015/01/07 00:53:44
I liked the IsBuggyAthlon function, it would make
miu
2015/01/07 05:22:45
Done.
| |
| 432 now_function = system_trace_now_function = &RolloverProtectedNow; | |
|
brianderson
2015/01/07 00:53:44
I also think system trace should always use HighRe
miu
2015/01/07 05:22:45
Done.
| |
| 433 } else if (!cpu.has_non_stop_time_stamp_counter()) { | |
| 434 now_function = &RolloverProtectedNow; | |
| 435 system_trace_now_function = &QPCNow; | |
| 436 } else { | |
| 437 now_function = system_trace_now_function = &QPCNow; | |
| 393 } | 438 } |
| 394 | 439 |
| 395 bool IsUsingHighResClock() { | 440 InterlockedExchangePointer( |
| 396 return ticks_per_second_ != 0; | 441 reinterpret_cast<void* volatile*>(&g_now_function), |
| 397 } | 442 now_function); |
| 398 | 443 InterlockedExchangePointer( |
| 399 TimeDelta Now() { | 444 reinterpret_cast<void* volatile*>(&g_system_trace_now_function), |
| 400 if (IsUsingHighResClock()) | 445 system_trace_now_function); |
| 401 return TimeDelta::FromMicroseconds(UnreliableNow()); | |
| 402 | |
| 403 // Just fallback to the slower clock. | |
| 404 return RolloverProtectedNow(); | |
| 405 } | |
| 406 | |
| 407 int64 GetQPCDriftMicroseconds() { | |
| 408 if (!IsUsingHighResClock()) | |
| 409 return 0; | |
| 410 return abs((UnreliableNow() - ReliableNow()) - skew_); | |
| 411 } | |
| 412 | |
| 413 int64 QPCValueToMicroseconds(LONGLONG qpc_value) { | |
| 414 if (!ticks_per_second_) | |
| 415 return 0; | |
| 416 // If the QPC Value is below the overflow threshold, we proceed with | |
| 417 // simple multiply and divide. | |
| 418 if (qpc_value < Time::kQPCOverflowThreshold) | |
| 419 return qpc_value * Time::kMicrosecondsPerSecond / ticks_per_second_; | |
| 420 // Otherwise, calculate microseconds in a round about manner to avoid | |
| 421 // overflow and precision issues. | |
| 422 int64 whole_seconds = qpc_value / ticks_per_second_; | |
| 423 int64 leftover_ticks = qpc_value - (whole_seconds * ticks_per_second_); | |
| 424 int64 microseconds = (whole_seconds * Time::kMicrosecondsPerSecond) + | |
| 425 ((leftover_ticks * Time::kMicrosecondsPerSecond) / | |
| 426 ticks_per_second_); | |
| 427 return microseconds; | |
| 428 } | |
| 429 | |
| 430 private: | |
| 431 // Get the number of microseconds since boot in an unreliable fashion. | |
| 432 int64 UnreliableNow() { | |
| 433 LARGE_INTEGER now; | |
| 434 QueryPerformanceCounter(&now); | |
| 435 return QPCValueToMicroseconds(now.QuadPart); | |
| 436 } | |
| 437 | |
| 438 // Get the number of microseconds since boot in a reliable fashion. | |
| 439 int64 ReliableNow() { | |
| 440 return RolloverProtectedNow().InMicroseconds(); | |
| 441 } | |
| 442 | |
| 443 int64 ticks_per_second_; // 0 indicates QPF failed and we're broken. | |
| 444 int64 skew_; // Skew between lo-res and hi-res clocks (for debugging). | |
| 445 }; | |
| 446 | |
| 447 static base::LazyInstance<HighResNowSingleton>::Leaky | |
|
brianderson
2015/01/07 00:53:44
You are no longer using a LazyInstance for QPC. We
miu
2015/01/07 05:22:45
Acknowledged.
| |
| 448 leaky_high_res_now_singleton = LAZY_INSTANCE_INITIALIZER; | |
| 449 | |
| 450 HighResNowSingleton* GetHighResNowSingleton() { | |
| 451 return leaky_high_res_now_singleton.Pointer(); | |
| 452 } | 446 } |
| 453 | 447 |
| 454 TimeDelta HighResNowWrapper() { | 448 TimeTicks InitialNowFunction() { |
| 455 return GetHighResNowSingleton()->Now(); | 449 InitializeNowFunctionPointers(); |
| 450 return g_now_function(); | |
| 456 } | 451 } |
| 457 | 452 |
| 458 typedef TimeDelta (*NowFunction)(void); | 453 TimeTicks InitialSystemTraceNowFunction() { |
| 459 | 454 InitializeNowFunctionPointers(); |
| 460 bool CPUReliablySupportsHighResTime() { | 455 return g_system_trace_now_function(); |
| 461 base::CPU cpu; | |
| 462 if (!cpu.has_non_stop_time_stamp_counter() || | |
| 463 !GetHighResNowSingleton()->IsUsingHighResClock()) | |
| 464 return false; | |
| 465 | |
| 466 if (IsBuggyAthlon(cpu)) | |
| 467 return false; | |
| 468 | |
| 469 return true; | |
| 470 } | |
| 471 | |
| 472 TimeDelta InitialNowFunction(); | |
| 473 | |
| 474 volatile NowFunction now_function = InitialNowFunction; | |
| 475 | |
| 476 TimeDelta InitialNowFunction() { | |
| 477 if (!CPUReliablySupportsHighResTime()) { | |
| 478 InterlockedExchangePointer( | |
| 479 reinterpret_cast<void* volatile*>(&now_function), | |
| 480 &RolloverProtectedNow); | |
| 481 return RolloverProtectedNow(); | |
| 482 } | |
| 483 InterlockedExchangePointer( | |
| 484 reinterpret_cast<void* volatile*>(&now_function), | |
| 485 &HighResNowWrapper); | |
| 486 return HighResNowWrapper(); | |
| 487 } | 456 } |
| 488 | 457 |
| 489 } // namespace | 458 } // namespace |
| 490 | 459 |
| 491 // static | 460 // static |
| 492 TimeTicks::TickFunctionType TimeTicks::SetMockTickFunction( | 461 TimeTicks::TickFunctionType TimeTicks::SetMockTickFunction( |
| 493 TickFunctionType ticker) { | 462 TickFunctionType ticker) { |
| 494 base::AutoLock locked(rollover_lock); | 463 base::AutoLock locked(g_rollover_lock); |
| 495 TickFunctionType old = tick_function; | 464 TickFunctionType old = g_tick_function; |
| 496 tick_function = ticker; | 465 g_tick_function = ticker; |
| 497 rollover_ms = 0; | 466 g_rollover_ms = 0; |
| 498 last_seen_now = 0; | 467 g_last_seen_now = 0; |
| 499 return old; | 468 return old; |
| 500 } | 469 } |
| 501 | 470 |
| 502 // static | 471 // static |
| 503 TimeTicks TimeTicks::Now() { | 472 TimeTicks TimeTicks::Now() { |
| 504 return TimeTicks() + now_function(); | 473 return g_now_function(); |
| 505 } | 474 } |
| 506 | 475 |
| 507 // static | 476 // static |
| 508 TimeTicks TimeTicks::HighResNow() { | 477 bool TimeTicks::IsHighResolution() { |
| 509 return TimeTicks() + HighResNowWrapper(); | 478 if (g_now_function == &InitialNowFunction) |
|
DaleCurtis
2015/01/07 01:38:36
This needs an interlocked read.
miu
2015/01/07 05:22:45
With the new, simpler memory-ordering model (PS3),
| |
| 479 InitializeNowFunctionPointers(); | |
| 480 return g_now_function == &QPCNow; | |
| 510 } | 481 } |
| 511 | 482 |
| 512 // static | 483 // static |
| 513 bool TimeTicks::IsHighResNowFastAndReliable() { | |
| 514 return CPUReliablySupportsHighResTime(); | |
| 515 } | |
| 516 | |
| 517 // static | |
| 518 TimeTicks TimeTicks::ThreadNow() { | 484 TimeTicks TimeTicks::ThreadNow() { |
| 519 NOTREACHED(); | 485 NOTREACHED(); |
| 520 return TimeTicks(); | 486 return TimeTicks(); |
| 521 } | 487 } |
| 522 | 488 |
| 523 // static | 489 // static |
| 524 TimeTicks TimeTicks::NowFromSystemTraceTime() { | 490 TimeTicks TimeTicks::NowFromSystemTraceTime() { |
| 525 return HighResNow(); | 491 return g_system_trace_now_function(); |
| 526 } | |
| 527 | |
| 528 // static | |
| 529 int64 TimeTicks::GetQPCDriftMicroseconds() { | |
| 530 return GetHighResNowSingleton()->GetQPCDriftMicroseconds(); | |
| 531 } | 492 } |
| 532 | 493 |
| 533 // static | 494 // static |
| 534 TimeTicks TimeTicks::FromQPCValue(LONGLONG qpc_value) { | 495 TimeTicks TimeTicks::FromQPCValue(LONGLONG qpc_value) { |
| 535 return TimeTicks(GetHighResNowSingleton()->QPCValueToMicroseconds(qpc_value)); | 496 return TimeTicks() + QPCValueToTimeDelta(qpc_value); |
|
DaleCurtis
2015/01/07 01:38:36
Any chance someone might call FromQPCValue() below
miu
2015/01/07 05:22:45
No. I changed the header comment in time.h to exp
| |
| 536 } | |
| 537 | |
| 538 // static | |
| 539 bool TimeTicks::IsHighResClockWorking() { | |
| 540 return GetHighResNowSingleton()->IsUsingHighResClock(); | |
| 541 } | 497 } |
| 542 | 498 |
| 543 // TimeDelta ------------------------------------------------------------------ | 499 // TimeDelta ------------------------------------------------------------------ |
| 544 | 500 |
| 545 // static | 501 // static |
| 546 TimeDelta TimeDelta::FromQPCValue(LONGLONG qpc_value) { | 502 TimeDelta TimeDelta::FromQPCValue(LONGLONG qpc_value) { |
| 547 return TimeDelta(GetHighResNowSingleton()->QPCValueToMicroseconds(qpc_value)); | 503 return QPCValueToTimeDelta(qpc_value); |
| 548 } | 504 } |
| OLD | NEW |