OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 | 5 |
6 // Windows Timer Primer | 6 // Windows Timer Primer |
7 // | 7 // |
8 // A good article: http://www.ddj.com/windows/184416651 | 8 // A good article: http://www.ddj.com/windows/184416651 |
9 // A good mozilla bug: http://bugzilla.mozilla.org/show_bug.cgi?id=363258 | 9 // A good mozilla bug: http://bugzilla.mozilla.org/show_bug.cgi?id=363258 |
10 // | 10 // |
(...skipping 19 matching lines...) Expand all Loading... | |
30 // To work around all this, we're going to generally use timeGetTime(). We | 30 // To work around all this, we're going to generally use timeGetTime(). We |
31 // will only increase the system-wide timer if we're not running on battery | 31 // will only increase the system-wide timer if we're not running on battery |
32 // power. | 32 // power. |
33 | 33 |
34 #include "base/time/time.h" | 34 #include "base/time/time.h" |
35 | 35 |
36 #pragma comment(lib, "winmm.lib") | 36 #pragma comment(lib, "winmm.lib") |
37 #include <windows.h> | 37 #include <windows.h> |
38 #include <mmsystem.h> | 38 #include <mmsystem.h> |
39 | 39 |
40 #include "base/atomicops.h" | |
40 #include "base/basictypes.h" | 41 #include "base/basictypes.h" |
41 #include "base/cpu.h" | 42 #include "base/cpu.h" |
42 #include "base/lazy_instance.h" | 43 #include "base/lazy_instance.h" |
43 #include "base/logging.h" | 44 #include "base/logging.h" |
44 #include "base/synchronization/lock.h" | 45 #include "base/synchronization/lock.h" |
45 | 46 |
46 using base::Time; | 47 using base::Time; |
47 using base::TimeDelta; | 48 using base::TimeDelta; |
48 using base::TimeTicks; | 49 using base::TimeTicks; |
49 | 50 |
(...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
300 // TimeTicks ------------------------------------------------------------------ | 301 // TimeTicks ------------------------------------------------------------------ |
301 namespace { | 302 namespace { |
302 | 303 |
303 // We define a wrapper to adapt between the __stdcall and __cdecl call of the | 304 // We define a wrapper to adapt between the __stdcall and __cdecl call of the |
304 // mock function, and to avoid a static constructor. Assigning an import to a | 305 // mock function, and to avoid a static constructor. Assigning an import to a |
305 // function pointer directly would require setup code to fetch from the IAT. | 306 // function pointer directly would require setup code to fetch from the IAT. |
306 DWORD timeGetTimeWrapper() { | 307 DWORD timeGetTimeWrapper() { |
307 return timeGetTime(); | 308 return timeGetTime(); |
308 } | 309 } |
309 | 310 |
310 DWORD (*tick_function)(void) = &timeGetTimeWrapper; | 311 DWORD (*g_tick_function)(void) = &timeGetTimeWrapper; |
311 | 312 |
312 // Accumulation of time lost due to rollover (in milliseconds). | 313 // Accumulation of time lost due to rollover (in milliseconds). |
313 int64 rollover_ms = 0; | 314 int64 g_rollover_ms = 0; |
314 | 315 |
315 // The last timeGetTime value we saw, to detect rollover. | 316 // The last timeGetTime value we saw, to detect rollover. |
316 DWORD last_seen_now = 0; | 317 DWORD g_last_seen_now = 0; |
317 | 318 |
318 // Lock protecting rollover_ms and last_seen_now. | 319 // Lock protecting rollover_ms and last_seen_now. |
319 // Note: this is a global object, and we usually avoid these. However, the time | 320 // Note: this is a global object, and we usually avoid these. However, the time |
320 // code is low-level, and we don't want to use Singletons here (it would be too | 321 // code is low-level, and we don't want to use Singletons here (it would be too |
321 // easy to use a Singleton without even knowing it, and that may lead to many | 322 // easy to use a Singleton without even knowing it, and that may lead to many |
322 // gotchas). Its impact on startup time should be negligible due to low-level | 323 // gotchas). Its impact on startup time should be negligible due to low-level |
323 // nature of time code. | 324 // nature of time code. |
324 base::Lock rollover_lock; | 325 base::Lock g_rollover_lock; |
325 | 326 |
326 // We use timeGetTime() to implement TimeTicks::Now(). This can be problematic | 327 // We use timeGetTime() to implement TimeTicks::Now(). This can be problematic |
327 // because it returns the number of milliseconds since Windows has started, | 328 // because it returns the number of milliseconds since Windows has started, |
328 // which will roll over the 32-bit value every ~49 days. We try to track | 329 // which will roll over the 32-bit value every ~49 days. We try to track |
329 // rollover ourselves, which works if TimeTicks::Now() is called at least every | 330 // rollover ourselves, which works if TimeTicks::Now() is called at least every |
330 // 49 days. | 331 // 49 days. |
331 TimeDelta RolloverProtectedNow() { | 332 TimeTicks RolloverProtectedNow() { |
332 base::AutoLock locked(rollover_lock); | 333 base::AutoLock locked(g_rollover_lock); |
333 // We should hold the lock while calling tick_function to make sure that | 334 // We should hold the lock while calling tick_function to make sure that |
334 // we keep last_seen_now stay correctly in sync. | 335 // we keep last_seen_now stay correctly in sync. |
335 DWORD now = tick_function(); | 336 DWORD now = g_tick_function(); |
336 if (now < last_seen_now) | 337 if (now < g_last_seen_now) |
337 rollover_ms += 0x100000000I64; // ~49.7 days. | 338 g_rollover_ms += 0x100000000I64; // ~49.7 days. |
338 last_seen_now = now; | 339 g_last_seen_now = now; |
339 return TimeDelta::FromMilliseconds(now + rollover_ms); | 340 return TimeTicks() + TimeDelta::FromMilliseconds(now + g_rollover_ms); |
340 } | 341 } |
341 | 342 |
342 bool IsBuggyAthlon(const base::CPU& cpu) { | 343 // Discussion of tick counter options on Windows: |
343 // On Athlon X2 CPUs (e.g. model 15) QueryPerformanceCounter is | 344 // |
344 // unreliable. Fallback to low-res clock. | |
345 return cpu.vendor_name() == "AuthenticAMD" && cpu.family() == 15; | |
346 } | |
347 | |
348 // Overview of time counters: | |
349 // (1) CPU cycle counter. (Retrieved via RDTSC) | 345 // (1) CPU cycle counter. (Retrieved via RDTSC) |
350 // The CPU counter provides the highest resolution time stamp and is the least | 346 // The CPU counter provides the highest resolution time stamp and is the least |
351 // expensive to retrieve. However, the CPU counter is unreliable and should not | 347 // expensive to retrieve. However, the CPU counter is unreliable and should not |
352 // be used in production. Its biggest issue is that it is per processor and it | 348 // be used in production. Its biggest issue is that it is per processor and it |
353 // is not synchronized between processors. Also, on some computers, the counters | 349 // is not synchronized between processors. Also, on some computers, the counters |
354 // will change frequency due to thermal and power changes, and stop in some | 350 // will change frequency due to thermal and power changes, and stop in some |
355 // states. | 351 // states. |
356 // | 352 // |
357 // (2) QueryPerformanceCounter (QPC). The QPC counter provides a high- | 353 // (2) QueryPerformanceCounter (QPC). The QPC counter provides a high- |
358 // resolution (100 nanoseconds) time stamp but is comparatively more expensive | 354 // resolution (100 nanoseconds) time stamp but is comparatively more expensive |
359 // to retrieve. What QueryPerformanceCounter actually does is up to the HAL. | 355 // to retrieve. What QueryPerformanceCounter actually does is up to the HAL. |
360 // (with some help from ACPI). | 356 // (with some help from ACPI). |
361 // According to http://blogs.msdn.com/oldnewthing/archive/2005/09/02/459952.aspx | 357 // According to http://blogs.msdn.com/oldnewthing/archive/2005/09/02/459952.aspx |
362 // in the worst case, it gets the counter from the rollover interrupt on the | 358 // in the worst case, it gets the counter from the rollover interrupt on the |
363 // programmable interrupt timer. In best cases, the HAL may conclude that the | 359 // programmable interrupt timer. In best cases, the HAL may conclude that the |
364 // RDTSC counter runs at a constant frequency, then it uses that instead. On | 360 // RDTSC counter runs at a constant frequency, then it uses that instead. On |
365 // multiprocessor machines, it will try to verify the values returned from | 361 // multiprocessor machines, it will try to verify the values returned from |
366 // RDTSC on each processor are consistent with each other, and apply a handful | 362 // RDTSC on each processor are consistent with each other, and apply a handful |
cpu_(ooo_6.6-7.5)
2015/01/07 17:41:31
comment section 361 - 367 needs update?
miu
2015/01/07 19:20:18
Good idea. I'll update this discussion with all m
| |
367 // of workarounds for known buggy hardware. In other words, QPC is supposed to | 363 // of workarounds for known buggy hardware. In other words, QPC is supposed to |
368 // give consistent result on a multiprocessor computer, but it is unreliable in | 364 // give consistent result on a multiprocessor computer, but it is unreliable in |
369 // reality due to bugs in BIOS or HAL on some, especially old computers. | 365 // reality due to bugs in BIOS or HAL on some, especially old computers. |
370 // With recent updates on HAL and newer BIOS, QPC is getting more reliable but | 366 // With recent updates on HAL and newer BIOS, QPC is getting more reliable but |
371 // it should be used with caution. | 367 // it should be used with caution. |
372 // | 368 // |
373 // (3) System time. The system time provides a low-resolution (typically 10ms | 369 // (3) System time. The system time provides a low-resolution (typically 10ms |
374 // to 55 milliseconds) time stamp but is comparatively less expensive to | 370 // to 55 milliseconds) time stamp but is comparatively less expensive to |
cpu_(ooo_6.6-7.5)
2015/01/07 17:41:31
55 ?
miu
2015/01/07 19:20:17
Not sure where these numbers come from, but I'll u
| |
375 // retrieve and more reliable. | 371 // retrieve and more reliable. |
376 class HighResNowSingleton { | |
377 public: | |
378 HighResNowSingleton() | |
379 : ticks_per_second_(0), | |
380 skew_(0) { | |
381 | 372 |
382 base::CPU cpu; | 373 using NowFunction = TimeTicks (*)(void); |
383 if (IsBuggyAthlon(cpu)) | |
384 return; | |
385 | 374 |
386 // Synchronize the QPC clock with GetSystemTimeAsFileTime. | 375 TimeTicks InitialNowFunction(); |
387 LARGE_INTEGER ticks_per_sec = {0}; | 376 TimeTicks InitialSystemTraceNowFunction(); |
388 if (!QueryPerformanceFrequency(&ticks_per_sec)) | |
389 return; // QPC is not available. | |
390 ticks_per_second_ = ticks_per_sec.QuadPart; | |
391 | 377 |
392 skew_ = UnreliableNow() - ReliableNow(); | 378 // See "threading notes" in InitializeNowFunctionPointers() for details on how |
379 // concurrent reads/writes to these globals has been made safe. | |
380 NowFunction g_now_function = &InitialNowFunction; | |
381 NowFunction g_system_trace_now_function = &InitialSystemTraceNowFunction; | |
382 int64 g_qpc_ticks_per_second = 0; | |
383 | |
384 TimeDelta QPCValueToTimeDelta(LONGLONG qpc_value) { | |
brianderson
2015/01/07 19:53:11
@brucedawson makes a good point about needing to p
miu
2015/01/07 22:04:32
Done. I had considered this, but deemed it imposs
| |
385 DCHECK_GT(g_qpc_ticks_per_second, 0); | |
386 | |
387 // If the QPC Value is below the overflow threshold, we proceed with | |
388 // simple multiply and divide. | |
389 if (qpc_value < Time::kQPCOverflowThreshold) { | |
390 return TimeDelta::FromMicroseconds( | |
391 qpc_value * Time::kMicrosecondsPerSecond / g_qpc_ticks_per_second); | |
392 } | |
393 // Otherwise, calculate microseconds in a round about manner to avoid | |
394 // overflow and precision issues. | |
395 int64 whole_seconds = qpc_value / g_qpc_ticks_per_second; | |
396 int64 leftover_ticks = qpc_value - (whole_seconds * g_qpc_ticks_per_second); | |
397 return TimeDelta::FromMicroseconds( | |
398 (whole_seconds * Time::kMicrosecondsPerSecond) + | |
399 ((leftover_ticks * Time::kMicrosecondsPerSecond) / | |
400 g_qpc_ticks_per_second)); | |
401 } | |
402 | |
403 TimeTicks QPCNow() { | |
404 LARGE_INTEGER now; | |
405 QueryPerformanceCounter(&now); | |
406 return TimeTicks() + QPCValueToTimeDelta(now.QuadPart); | |
407 } | |
408 | |
409 bool IsBuggyAthlon(const base::CPU& cpu) { | |
410 // On Athlon X2 CPUs (e.g. model 15) QueryPerformanceCounter is unreliable. | |
411 return cpu.vendor_name() == "AuthenticAMD" && cpu.family() == 15; | |
412 } | |
413 | |
414 void InitializeNowFunctionPointers() { | |
415 LARGE_INTEGER ticks_per_sec = {0}; | |
416 if (!QueryPerformanceFrequency(&ticks_per_sec)) | |
417 ticks_per_sec.QuadPart = 0; | |
418 | |
419 // If Windows cannot provide a QPC implementation, both Now() and | |
420 // NowFromSystemTraceTime() must use the low-resolution clock. | |
421 // | |
422 // If the QPC implementation is expensive and/or unreliable, Now() will use | |
423 // the low-resolution clock, but NowFromSystemTraceTime() will use the QPC (in | |
424 // the hope that it is still useful for tracing purposes). A CPU lacking a | |
425 // non-stop time counter will cause Windows to provide an alternate QPC | |
426 // implementation that works, but is expensive to use. Certain Athlon CPUs are | |
427 // known to make the QPC implementation unreliable. | |
428 // | |
429 // Otherwise, both Now functions can use the high-resolution QPC clock. As of | |
430 // 4 January 2015, ~68% of users fall within this category. | |
431 NowFunction now_function; | |
432 NowFunction system_trace_now_function; | |
433 base::CPU cpu; | |
434 if (ticks_per_sec.QuadPart <= 0) { | |
435 now_function = system_trace_now_function = &RolloverProtectedNow; | |
436 } else if (!cpu.has_non_stop_time_stamp_counter() || IsBuggyAthlon(cpu)) { | |
437 now_function = &RolloverProtectedNow; | |
438 system_trace_now_function = &QPCNow; | |
439 } else { | |
440 now_function = system_trace_now_function = &QPCNow; | |
393 } | 441 } |
394 | 442 |
395 bool IsUsingHighResClock() { | 443 // Threading note 1: In an unlikely race condition, it's possible for two or |
396 return ticks_per_second_ != 0; | 444 // more threads to enter InitializeNowFunctionPointers() in parallel. This is |
397 } | 445 // not a problem since all threads should end up writing out the same values |
398 | 446 // to the global variables. |
399 TimeDelta Now() { | 447 // |
cpu_(ooo_6.6-7.5)
2015/01/07 17:41:31
remind me again why we don't simply initialize thi
miu
2015/01/07 19:20:17
We could. I was just sticking with what we had.
| |
400 if (IsUsingHighResClock()) | 448 // Threading note 2: The store to |g_qpc_ticks_per_second| must be complete |
401 return TimeDelta::FromMicroseconds(UnreliableNow()); | 449 // and visible to other threads before the new value for |g_now_function| |
402 | 450 // becomes visible to other threads. A memory barrier is used to guarantee |
403 // Just fallback to the slower clock. | 451 // this ordering. |
404 return RolloverProtectedNow(); | 452 g_qpc_ticks_per_second = ticks_per_sec.QuadPart; |
405 } | 453 base::subtle::MemoryBarrier(); |
406 | 454 g_now_function = now_function; |
407 int64 GetQPCDriftMicroseconds() { | 455 g_system_trace_now_function = system_trace_now_function; |
408 if (!IsUsingHighResClock()) | |
409 return 0; | |
410 return abs((UnreliableNow() - ReliableNow()) - skew_); | |
411 } | |
412 | |
413 int64 QPCValueToMicroseconds(LONGLONG qpc_value) { | |
414 if (!ticks_per_second_) | |
415 return 0; | |
416 // If the QPC Value is below the overflow threshold, we proceed with | |
417 // simple multiply and divide. | |
418 if (qpc_value < Time::kQPCOverflowThreshold) | |
419 return qpc_value * Time::kMicrosecondsPerSecond / ticks_per_second_; | |
420 // Otherwise, calculate microseconds in a round about manner to avoid | |
421 // overflow and precision issues. | |
422 int64 whole_seconds = qpc_value / ticks_per_second_; | |
423 int64 leftover_ticks = qpc_value - (whole_seconds * ticks_per_second_); | |
424 int64 microseconds = (whole_seconds * Time::kMicrosecondsPerSecond) + | |
425 ((leftover_ticks * Time::kMicrosecondsPerSecond) / | |
426 ticks_per_second_); | |
427 return microseconds; | |
428 } | |
429 | |
430 private: | |
431 // Get the number of microseconds since boot in an unreliable fashion. | |
432 int64 UnreliableNow() { | |
433 LARGE_INTEGER now; | |
434 QueryPerformanceCounter(&now); | |
435 return QPCValueToMicroseconds(now.QuadPart); | |
436 } | |
437 | |
438 // Get the number of microseconds since boot in a reliable fashion. | |
439 int64 ReliableNow() { | |
440 return RolloverProtectedNow().InMicroseconds(); | |
441 } | |
442 | |
443 int64 ticks_per_second_; // 0 indicates QPF failed and we're broken. | |
444 int64 skew_; // Skew between lo-res and hi-res clocks (for debugging). | |
445 }; | |
446 | |
447 static base::LazyInstance<HighResNowSingleton>::Leaky | |
448 leaky_high_res_now_singleton = LAZY_INSTANCE_INITIALIZER; | |
449 | |
450 HighResNowSingleton* GetHighResNowSingleton() { | |
451 return leaky_high_res_now_singleton.Pointer(); | |
452 } | 456 } |
453 | 457 |
454 TimeDelta HighResNowWrapper() { | 458 TimeTicks InitialNowFunction() { |
455 return GetHighResNowSingleton()->Now(); | 459 InitializeNowFunctionPointers(); |
460 return g_now_function(); | |
456 } | 461 } |
457 | 462 |
458 typedef TimeDelta (*NowFunction)(void); | 463 TimeTicks InitialSystemTraceNowFunction() { |
459 | 464 InitializeNowFunctionPointers(); |
460 bool CPUReliablySupportsHighResTime() { | 465 return g_system_trace_now_function(); |
461 base::CPU cpu; | |
462 if (!cpu.has_non_stop_time_stamp_counter() || | |
463 !GetHighResNowSingleton()->IsUsingHighResClock()) | |
464 return false; | |
465 | |
466 if (IsBuggyAthlon(cpu)) | |
467 return false; | |
468 | |
469 return true; | |
470 } | |
471 | |
472 TimeDelta InitialNowFunction(); | |
473 | |
474 volatile NowFunction now_function = InitialNowFunction; | |
475 | |
476 TimeDelta InitialNowFunction() { | |
477 if (!CPUReliablySupportsHighResTime()) { | |
478 InterlockedExchangePointer( | |
479 reinterpret_cast<void* volatile*>(&now_function), | |
480 &RolloverProtectedNow); | |
481 return RolloverProtectedNow(); | |
482 } | |
483 InterlockedExchangePointer( | |
484 reinterpret_cast<void* volatile*>(&now_function), | |
485 &HighResNowWrapper); | |
486 return HighResNowWrapper(); | |
487 } | 466 } |
488 | 467 |
489 } // namespace | 468 } // namespace |
490 | 469 |
491 // static | 470 // static |
492 TimeTicks::TickFunctionType TimeTicks::SetMockTickFunction( | 471 TimeTicks::TickFunctionType TimeTicks::SetMockTickFunction( |
493 TickFunctionType ticker) { | 472 TickFunctionType ticker) { |
494 base::AutoLock locked(rollover_lock); | 473 base::AutoLock locked(g_rollover_lock); |
495 TickFunctionType old = tick_function; | 474 TickFunctionType old = g_tick_function; |
496 tick_function = ticker; | 475 g_tick_function = ticker; |
497 rollover_ms = 0; | 476 g_rollover_ms = 0; |
498 last_seen_now = 0; | 477 g_last_seen_now = 0; |
499 return old; | 478 return old; |
500 } | 479 } |
501 | 480 |
502 // static | 481 // static |
503 TimeTicks TimeTicks::Now() { | 482 TimeTicks TimeTicks::Now() { |
504 return TimeTicks() + now_function(); | 483 return g_now_function(); |
505 } | 484 } |
506 | 485 |
507 // static | 486 // static |
508 TimeTicks TimeTicks::HighResNow() { | 487 bool TimeTicks::IsHighResolution() { |
509 return TimeTicks() + HighResNowWrapper(); | 488 if (g_now_function == &InitialNowFunction) |
489 InitializeNowFunctionPointers(); | |
490 return g_now_function == &QPCNow; | |
510 } | 491 } |
511 | 492 |
512 // static | 493 // static |
513 bool TimeTicks::IsHighResNowFastAndReliable() { | |
514 return CPUReliablySupportsHighResTime(); | |
515 } | |
516 | |
517 // static | |
518 TimeTicks TimeTicks::ThreadNow() { | 494 TimeTicks TimeTicks::ThreadNow() { |
519 NOTREACHED(); | 495 NOTREACHED(); |
520 return TimeTicks(); | 496 return TimeTicks(); |
521 } | 497 } |
522 | 498 |
523 // static | 499 // static |
524 TimeTicks TimeTicks::NowFromSystemTraceTime() { | 500 TimeTicks TimeTicks::NowFromSystemTraceTime() { |
525 return HighResNow(); | 501 return g_system_trace_now_function(); |
526 } | |
527 | |
528 // static | |
529 int64 TimeTicks::GetQPCDriftMicroseconds() { | |
530 return GetHighResNowSingleton()->GetQPCDriftMicroseconds(); | |
531 } | 502 } |
532 | 503 |
533 // static | 504 // static |
534 TimeTicks TimeTicks::FromQPCValue(LONGLONG qpc_value) { | 505 TimeTicks TimeTicks::FromQPCValue(LONGLONG qpc_value) { |
535 return TimeTicks(GetHighResNowSingleton()->QPCValueToMicroseconds(qpc_value)); | 506 return TimeTicks() + QPCValueToTimeDelta(qpc_value); |
536 } | |
537 | |
538 // static | |
539 bool TimeTicks::IsHighResClockWorking() { | |
540 return GetHighResNowSingleton()->IsUsingHighResClock(); | |
541 } | 507 } |
542 | 508 |
543 // TimeDelta ------------------------------------------------------------------ | 509 // TimeDelta ------------------------------------------------------------------ |
544 | 510 |
545 // static | 511 // static |
546 TimeDelta TimeDelta::FromQPCValue(LONGLONG qpc_value) { | 512 TimeDelta TimeDelta::FromQPCValue(LONGLONG qpc_value) { |
547 return TimeDelta(GetHighResNowSingleton()->QPCValueToMicroseconds(qpc_value)); | 513 return QPCValueToTimeDelta(qpc_value); |
548 } | 514 } |
OLD | NEW |