OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 | 5 |
6 // Windows Timer Primer | 6 // Windows Timer Primer |
7 // | 7 // |
8 // A good article: http://www.ddj.com/windows/184416651 | 8 // A good article: http://www.ddj.com/windows/184416651 |
9 // A good mozilla bug: http://bugzilla.mozilla.org/show_bug.cgi?id=363258 | 9 // A good mozilla bug: http://bugzilla.mozilla.org/show_bug.cgi?id=363258 |
10 // | 10 // |
(...skipping 19 matching lines...) Expand all Loading... | |
30 // To work around all this, we're going to generally use timeGetTime(). We | 30 // To work around all this, we're going to generally use timeGetTime(). We |
31 // will only increase the system-wide timer if we're not running on battery | 31 // will only increase the system-wide timer if we're not running on battery |
32 // power. | 32 // power. |
33 | 33 |
34 #include "base/time/time.h" | 34 #include "base/time/time.h" |
35 | 35 |
36 #pragma comment(lib, "winmm.lib") | 36 #pragma comment(lib, "winmm.lib") |
37 #include <windows.h> | 37 #include <windows.h> |
38 #include <mmsystem.h> | 38 #include <mmsystem.h> |
39 | 39 |
40 #include "base/atomicops.h" | |
40 #include "base/basictypes.h" | 41 #include "base/basictypes.h" |
41 #include "base/cpu.h" | 42 #include "base/cpu.h" |
42 #include "base/lazy_instance.h" | 43 #include "base/lazy_instance.h" |
43 #include "base/logging.h" | 44 #include "base/logging.h" |
44 #include "base/synchronization/lock.h" | 45 #include "base/synchronization/lock.h" |
45 | 46 |
46 using base::Time; | 47 using base::Time; |
47 using base::TimeDelta; | 48 using base::TimeDelta; |
48 using base::TimeTicks; | 49 using base::TimeTicks; |
49 | 50 |
(...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
300 // TimeTicks ------------------------------------------------------------------ | 301 // TimeTicks ------------------------------------------------------------------ |
301 namespace { | 302 namespace { |
302 | 303 |
303 // We define a wrapper to adapt between the __stdcall and __cdecl call of the | 304 // We define a wrapper to adapt between the __stdcall and __cdecl call of the |
304 // mock function, and to avoid a static constructor. Assigning an import to a | 305 // mock function, and to avoid a static constructor. Assigning an import to a |
305 // function pointer directly would require setup code to fetch from the IAT. | 306 // function pointer directly would require setup code to fetch from the IAT. |
306 DWORD timeGetTimeWrapper() { | 307 DWORD timeGetTimeWrapper() { |
307 return timeGetTime(); | 308 return timeGetTime(); |
308 } | 309 } |
309 | 310 |
310 DWORD (*tick_function)(void) = &timeGetTimeWrapper; | 311 DWORD (*g_tick_function)(void) = &timeGetTimeWrapper; |
311 | 312 |
312 // Accumulation of time lost due to rollover (in milliseconds). | 313 // Accumulation of time lost due to rollover (in milliseconds). |
313 int64 rollover_ms = 0; | 314 int64 g_rollover_ms = 0; |
314 | 315 |
315 // The last timeGetTime value we saw, to detect rollover. | 316 // The last timeGetTime value we saw, to detect rollover. |
316 DWORD last_seen_now = 0; | 317 DWORD g_last_seen_now = 0; |
317 | 318 |
318 // Lock protecting rollover_ms and last_seen_now. | 319 // Lock protecting rollover_ms and last_seen_now. |
319 // Note: this is a global object, and we usually avoid these. However, the time | 320 // Note: this is a global object, and we usually avoid these. However, the time |
320 // code is low-level, and we don't want to use Singletons here (it would be too | 321 // code is low-level, and we don't want to use Singletons here (it would be too |
321 // easy to use a Singleton without even knowing it, and that may lead to many | 322 // easy to use a Singleton without even knowing it, and that may lead to many |
322 // gotchas). Its impact on startup time should be negligible due to low-level | 323 // gotchas). Its impact on startup time should be negligible due to low-level |
323 // nature of time code. | 324 // nature of time code. |
324 base::Lock rollover_lock; | 325 base::Lock g_rollover_lock; |
325 | 326 |
326 // We use timeGetTime() to implement TimeTicks::Now(). This can be problematic | 327 // We use timeGetTime() to implement TimeTicks::Now(). This can be problematic |
327 // because it returns the number of milliseconds since Windows has started, | 328 // because it returns the number of milliseconds since Windows has started, |
328 // which will roll over the 32-bit value every ~49 days. We try to track | 329 // which will roll over the 32-bit value every ~49 days. We try to track |
329 // rollover ourselves, which works if TimeTicks::Now() is called at least every | 330 // rollover ourselves, which works if TimeTicks::Now() is called at least every |
330 // 49 days. | 331 // 49 days. |
331 TimeDelta RolloverProtectedNow() { | 332 TimeTicks RolloverProtectedNow() { |
332 base::AutoLock locked(rollover_lock); | 333 base::AutoLock locked(g_rollover_lock); |
333 // We should hold the lock while calling tick_function to make sure that | 334 // We should hold the lock while calling tick_function to make sure that |
334 // we keep last_seen_now stay correctly in sync. | 335 // we keep last_seen_now stay correctly in sync. |
335 DWORD now = tick_function(); | 336 DWORD now = g_tick_function(); |
336 if (now < last_seen_now) | 337 if (now < g_last_seen_now) |
337 rollover_ms += 0x100000000I64; // ~49.7 days. | 338 g_rollover_ms += 0x100000000I64; // ~49.7 days. |
338 last_seen_now = now; | 339 g_last_seen_now = now; |
339 return TimeDelta::FromMilliseconds(now + rollover_ms); | 340 return TimeTicks() + TimeDelta::FromMilliseconds(now + g_rollover_ms); |
340 } | 341 } |
341 | 342 |
342 bool IsBuggyAthlon(const base::CPU& cpu) { | 343 // Discussion of tick counter options on Windows: |
343 // On Athlon X2 CPUs (e.g. model 15) QueryPerformanceCounter is | 344 // |
344 // unreliable. Fallback to low-res clock. | |
345 return cpu.vendor_name() == "AuthenticAMD" && cpu.family() == 15; | |
346 } | |
347 | |
348 // Overview of time counters: | |
349 // (1) CPU cycle counter. (Retrieved via RDTSC) | 345 // (1) CPU cycle counter. (Retrieved via RDTSC) |
350 // The CPU counter provides the highest resolution time stamp and is the least | 346 // The CPU counter provides the highest resolution time stamp and is the least |
351 // expensive to retrieve. However, the CPU counter is unreliable and should not | 347 // expensive to retrieve. However, on older CPUs, two issues can affect its |
352 // be used in production. Its biggest issue is that it is per processor and it | 348 // reliability: First it is maintained per processor and not synchronized |
353 // is not synchronized between processors. Also, on some computers, the counters | 349 // between processors. Also, the counters will change frequency due to thermal |
354 // will change frequency due to thermal and power changes, and stop in some | 350 // and power changes, and stop in some states. |
355 // states. | |
356 // | 351 // |
357 // (2) QueryPerformanceCounter (QPC). The QPC counter provides a high- | 352 // (2) QueryPerformanceCounter (QPC). The QPC counter provides a high- |
358 // resolution (100 nanoseconds) time stamp but is comparatively more expensive | 353 // resolution (<1 microsecond) time stamp. On most hardware running today, it |
359 // to retrieve. What QueryPerformanceCounter actually does is up to the HAL. | 354 // auto-detects and uses the constant-rate RDTSC counter to provide extremely |
360 // (with some help from ACPI). | 355 // efficient and reliable time stamps. |
361 // According to http://blogs.msdn.com/oldnewthing/archive/2005/09/02/459952.aspx | 356 // |
362 // in the worst case, it gets the counter from the rollover interrupt on the | 357 // On older CPUs where RDTSC is unreliable, it falls back to using more |
358 // expensive (20X to 40X more costly) alternate clocks, such as HPET or the ACPI | |
359 // PM timer, and can involve system calls; and all this is up to the HAL (with | |
360 // some help from ACPI). According to | |
361 // http://blogs.msdn.com/oldnewthing/archive/2005/09/02/459952.aspx, in the | |
362 // worst case, it gets the counter from the rollover interrupt on the | |
363 // programmable interrupt timer. In best cases, the HAL may conclude that the | 363 // programmable interrupt timer. In best cases, the HAL may conclude that the |
364 // RDTSC counter runs at a constant frequency, then it uses that instead. On | 364 // RDTSC counter runs at a constant frequency, then it uses that instead. On |
365 // multiprocessor machines, it will try to verify the values returned from | 365 // multiprocessor machines, it will try to verify the values returned from |
366 // RDTSC on each processor are consistent with each other, and apply a handful | 366 // RDTSC on each processor are consistent with each other, and apply a handful |
367 // of workarounds for known buggy hardware. In other words, QPC is supposed to | 367 // of workarounds for known buggy hardware. In other words, QPC is supposed to |
368 // give consistent result on a multiprocessor computer, but it is unreliable in | 368 // give consistent results on a multiprocessor computer, but for older CPUs it |
369 // reality due to bugs in BIOS or HAL on some, especially old computers. | 369 // can be unreliable due bugs in BIOS or HAL. |
370 // With recent updates on HAL and newer BIOS, QPC is getting more reliable but | |
371 // it should be used with caution. | |
372 // | 370 // |
373 // (3) System time. The system time provides a low-resolution (typically 10ms | 371 // (3) System time. The system time provides a low-resolution (from ~1 to ~15.6 |
374 // to 55 milliseconds) time stamp but is comparatively less expensive to | 372 // milliseconds) time stamp but is comparatively less expensive to retrieve and |
375 // retrieve and more reliable. | 373 // more reliable. Time::EnableHighResolutionTimer() and |
376 class HighResNowSingleton { | 374 // Time::ActivateHighResolutionTimer() can be called to alter the resolution of |
377 public: | 375 // this timer; and also other Windows applications can alter it, affecting this |
378 HighResNowSingleton() | 376 // one. |
379 : ticks_per_second_(0), | |
380 skew_(0) { | |
381 | 377 |
382 base::CPU cpu; | 378 using NowFunction = TimeTicks (*)(void); |
383 if (IsBuggyAthlon(cpu)) | |
384 return; | |
385 | 379 |
386 // Synchronize the QPC clock with GetSystemTimeAsFileTime. | 380 TimeTicks InitialNowFunction(); |
387 LARGE_INTEGER ticks_per_sec = {0}; | 381 TimeTicks InitialSystemTraceNowFunction(); |
388 if (!QueryPerformanceFrequency(&ticks_per_sec)) | |
389 return; // QPC is not available. | |
390 ticks_per_second_ = ticks_per_sec.QuadPart; | |
391 | 382 |
392 skew_ = UnreliableNow() - ReliableNow(); | 383 // See "threading notes" in InitializeNowFunctionPointers() for details on how |
384 // concurrent reads/writes to these globals has been made safe. | |
385 NowFunction g_now_function = &InitialNowFunction; | |
386 NowFunction g_system_trace_now_function = &InitialSystemTraceNowFunction; | |
387 int64 g_qpc_ticks_per_second = 0; | |
388 | |
389 TimeDelta QPCValueToTimeDelta(LONGLONG qpc_value) { | |
390 // Ensure the memory store to |g_qpc_ticks_per_second|, made in | |
391 // InitializeNowFunctionPointers(), is visible to the current thread. | |
392 base::subtle::MemoryBarrier(); | |
brucedawson
2015/01/07 22:29:13
Well darn. I got what I requested and now I'm sad.
miu
2015/01/08 00:36:39
Done. Since our Windows toolchain was recently up
| |
393 | |
394 DCHECK_GT(g_qpc_ticks_per_second, 0); | |
395 | |
396 // If the QPC Value is below the overflow threshold, we proceed with | |
397 // simple multiply and divide. | |
398 if (qpc_value < Time::kQPCOverflowThreshold) { | |
399 return TimeDelta::FromMicroseconds( | |
400 qpc_value * Time::kMicrosecondsPerSecond / g_qpc_ticks_per_second); | |
401 } | |
402 // Otherwise, calculate microseconds in a round about manner to avoid | |
403 // overflow and precision issues. | |
404 int64 whole_seconds = qpc_value / g_qpc_ticks_per_second; | |
405 int64 leftover_ticks = qpc_value - (whole_seconds * g_qpc_ticks_per_second); | |
406 return TimeDelta::FromMicroseconds( | |
407 (whole_seconds * Time::kMicrosecondsPerSecond) + | |
408 ((leftover_ticks * Time::kMicrosecondsPerSecond) / | |
409 g_qpc_ticks_per_second)); | |
410 } | |
411 | |
412 TimeTicks QPCNow() { | |
413 LARGE_INTEGER now; | |
414 QueryPerformanceCounter(&now); | |
415 return TimeTicks() + QPCValueToTimeDelta(now.QuadPart); | |
416 } | |
417 | |
418 bool IsBuggyAthlon(const base::CPU& cpu) { | |
419 // On Athlon X2 CPUs (e.g. model 15) QueryPerformanceCounter is unreliable. | |
420 return cpu.vendor_name() == "AuthenticAMD" && cpu.family() == 15; | |
421 } | |
422 | |
423 void InitializeNowFunctionPointers() { | |
424 LARGE_INTEGER ticks_per_sec = {0}; | |
425 if (!QueryPerformanceFrequency(&ticks_per_sec)) | |
426 ticks_per_sec.QuadPart = 0; | |
427 | |
428 // If Windows cannot provide a QPC implementation, both Now() and | |
429 // NowFromSystemTraceTime() must use the low-resolution clock. | |
430 // | |
431 // If the QPC implementation is expensive and/or unreliable, Now() will use | |
432 // the low-resolution clock, but NowFromSystemTraceTime() will use the QPC (in | |
433 // the hope that it is still useful for tracing purposes). A CPU lacking a | |
434 // non-stop time counter will cause Windows to provide an alternate QPC | |
435 // implementation that works, but is expensive to use. Certain Athlon CPUs are | |
436 // known to make the QPC implementation unreliable. | |
437 // | |
438 // Otherwise, both Now functions can use the high-resolution QPC clock. As of | |
439 // 4 January 2015, ~68% of users fall within this category. | |
440 NowFunction now_function; | |
441 NowFunction system_trace_now_function; | |
442 base::CPU cpu; | |
443 if (ticks_per_sec.QuadPart <= 0) { | |
444 now_function = system_trace_now_function = &RolloverProtectedNow; | |
445 } else if (!cpu.has_non_stop_time_stamp_counter() || IsBuggyAthlon(cpu)) { | |
446 now_function = &RolloverProtectedNow; | |
447 system_trace_now_function = &QPCNow; | |
448 } else { | |
449 now_function = system_trace_now_function = &QPCNow; | |
393 } | 450 } |
394 | 451 |
395 bool IsUsingHighResClock() { | 452 // Threading note 1: In an unlikely race condition, it's possible for two or |
396 return ticks_per_second_ != 0; | 453 // more threads to enter InitializeNowFunctionPointers() in parallel. This is |
397 } | 454 // not a problem since all threads should end up writing out the same values |
398 | 455 // to the global variables. |
399 TimeDelta Now() { | 456 // |
400 if (IsUsingHighResClock()) | 457 // Threading note 2: The memory store to |g_qpc_ticks_per_second| must be |
401 return TimeDelta::FromMicroseconds(UnreliableNow()); | 458 // complete and visible to other threads before the new value for |
402 | 459 // |g_now_function| becomes visible to other threads. Memory barriers are used |
403 // Just fallback to the slower clock. | 460 // here and before |g_qpc_ticks_per_second| is read in QPCValueToTimeDelta() |
404 return RolloverProtectedNow(); | 461 // to guarantee this ordering. |
405 } | 462 g_qpc_ticks_per_second = ticks_per_sec.QuadPart; |
406 | 463 base::subtle::MemoryBarrier(); |
407 int64 GetQPCDriftMicroseconds() { | 464 g_now_function = now_function; |
408 if (!IsUsingHighResClock()) | 465 g_system_trace_now_function = system_trace_now_function; |
409 return 0; | |
410 return abs((UnreliableNow() - ReliableNow()) - skew_); | |
411 } | |
412 | |
413 int64 QPCValueToMicroseconds(LONGLONG qpc_value) { | |
414 if (!ticks_per_second_) | |
415 return 0; | |
416 // If the QPC Value is below the overflow threshold, we proceed with | |
417 // simple multiply and divide. | |
418 if (qpc_value < Time::kQPCOverflowThreshold) | |
419 return qpc_value * Time::kMicrosecondsPerSecond / ticks_per_second_; | |
420 // Otherwise, calculate microseconds in a round about manner to avoid | |
421 // overflow and precision issues. | |
422 int64 whole_seconds = qpc_value / ticks_per_second_; | |
423 int64 leftover_ticks = qpc_value - (whole_seconds * ticks_per_second_); | |
424 int64 microseconds = (whole_seconds * Time::kMicrosecondsPerSecond) + | |
425 ((leftover_ticks * Time::kMicrosecondsPerSecond) / | |
426 ticks_per_second_); | |
427 return microseconds; | |
428 } | |
429 | |
430 private: | |
431 // Get the number of microseconds since boot in an unreliable fashion. | |
432 int64 UnreliableNow() { | |
433 LARGE_INTEGER now; | |
434 QueryPerformanceCounter(&now); | |
435 return QPCValueToMicroseconds(now.QuadPart); | |
436 } | |
437 | |
438 // Get the number of microseconds since boot in a reliable fashion. | |
439 int64 ReliableNow() { | |
440 return RolloverProtectedNow().InMicroseconds(); | |
441 } | |
442 | |
443 int64 ticks_per_second_; // 0 indicates QPF failed and we're broken. | |
444 int64 skew_; // Skew between lo-res and hi-res clocks (for debugging). | |
445 }; | |
446 | |
447 static base::LazyInstance<HighResNowSingleton>::Leaky | |
448 leaky_high_res_now_singleton = LAZY_INSTANCE_INITIALIZER; | |
449 | |
450 HighResNowSingleton* GetHighResNowSingleton() { | |
451 return leaky_high_res_now_singleton.Pointer(); | |
452 } | 466 } |
453 | 467 |
454 TimeDelta HighResNowWrapper() { | 468 TimeTicks InitialNowFunction() { |
455 return GetHighResNowSingleton()->Now(); | 469 InitializeNowFunctionPointers(); |
470 return g_now_function(); | |
456 } | 471 } |
457 | 472 |
458 typedef TimeDelta (*NowFunction)(void); | 473 TimeTicks InitialSystemTraceNowFunction() { |
459 | 474 InitializeNowFunctionPointers(); |
460 bool CPUReliablySupportsHighResTime() { | 475 return g_system_trace_now_function(); |
461 base::CPU cpu; | |
462 if (!cpu.has_non_stop_time_stamp_counter() || | |
463 !GetHighResNowSingleton()->IsUsingHighResClock()) | |
464 return false; | |
465 | |
466 if (IsBuggyAthlon(cpu)) | |
467 return false; | |
468 | |
469 return true; | |
470 } | |
471 | |
472 TimeDelta InitialNowFunction(); | |
473 | |
474 volatile NowFunction now_function = InitialNowFunction; | |
475 | |
476 TimeDelta InitialNowFunction() { | |
477 if (!CPUReliablySupportsHighResTime()) { | |
478 InterlockedExchangePointer( | |
479 reinterpret_cast<void* volatile*>(&now_function), | |
480 &RolloverProtectedNow); | |
481 return RolloverProtectedNow(); | |
482 } | |
483 InterlockedExchangePointer( | |
484 reinterpret_cast<void* volatile*>(&now_function), | |
485 &HighResNowWrapper); | |
486 return HighResNowWrapper(); | |
487 } | 476 } |
488 | 477 |
489 } // namespace | 478 } // namespace |
490 | 479 |
491 // static | 480 // static |
492 TimeTicks::TickFunctionType TimeTicks::SetMockTickFunction( | 481 TimeTicks::TickFunctionType TimeTicks::SetMockTickFunction( |
493 TickFunctionType ticker) { | 482 TickFunctionType ticker) { |
494 base::AutoLock locked(rollover_lock); | 483 base::AutoLock locked(g_rollover_lock); |
495 TickFunctionType old = tick_function; | 484 TickFunctionType old = g_tick_function; |
496 tick_function = ticker; | 485 g_tick_function = ticker; |
497 rollover_ms = 0; | 486 g_rollover_ms = 0; |
498 last_seen_now = 0; | 487 g_last_seen_now = 0; |
499 return old; | 488 return old; |
500 } | 489 } |
501 | 490 |
502 // static | 491 // static |
503 TimeTicks TimeTicks::Now() { | 492 TimeTicks TimeTicks::Now() { |
504 return TimeTicks() + now_function(); | 493 return g_now_function(); |
505 } | 494 } |
506 | 495 |
507 // static | 496 // static |
508 TimeTicks TimeTicks::HighResNow() { | 497 bool TimeTicks::IsHighResolution() { |
509 return TimeTicks() + HighResNowWrapper(); | 498 if (g_now_function == &InitialNowFunction) |
499 InitializeNowFunctionPointers(); | |
500 return g_now_function == &QPCNow; | |
510 } | 501 } |
511 | 502 |
512 // static | 503 // static |
513 bool TimeTicks::IsHighResNowFastAndReliable() { | |
514 return CPUReliablySupportsHighResTime(); | |
515 } | |
516 | |
517 // static | |
518 TimeTicks TimeTicks::ThreadNow() { | 504 TimeTicks TimeTicks::ThreadNow() { |
519 NOTREACHED(); | 505 NOTREACHED(); |
520 return TimeTicks(); | 506 return TimeTicks(); |
521 } | 507 } |
522 | 508 |
523 // static | 509 // static |
524 TimeTicks TimeTicks::NowFromSystemTraceTime() { | 510 TimeTicks TimeTicks::NowFromSystemTraceTime() { |
525 return HighResNow(); | 511 return g_system_trace_now_function(); |
526 } | |
527 | |
528 // static | |
529 int64 TimeTicks::GetQPCDriftMicroseconds() { | |
530 return GetHighResNowSingleton()->GetQPCDriftMicroseconds(); | |
531 } | 512 } |
532 | 513 |
533 // static | 514 // static |
534 TimeTicks TimeTicks::FromQPCValue(LONGLONG qpc_value) { | 515 TimeTicks TimeTicks::FromQPCValue(LONGLONG qpc_value) { |
535 return TimeTicks(GetHighResNowSingleton()->QPCValueToMicroseconds(qpc_value)); | 516 return TimeTicks() + QPCValueToTimeDelta(qpc_value); |
536 } | |
537 | |
538 // static | |
539 bool TimeTicks::IsHighResClockWorking() { | |
540 return GetHighResNowSingleton()->IsUsingHighResClock(); | |
541 } | 517 } |
542 | 518 |
543 // TimeDelta ------------------------------------------------------------------ | 519 // TimeDelta ------------------------------------------------------------------ |
544 | 520 |
545 // static | 521 // static |
546 TimeDelta TimeDelta::FromQPCValue(LONGLONG qpc_value) { | 522 TimeDelta TimeDelta::FromQPCValue(LONGLONG qpc_value) { |
547 return TimeDelta(GetHighResNowSingleton()->QPCValueToMicroseconds(qpc_value)); | 523 return QPCValueToTimeDelta(qpc_value); |
548 } | 524 } |
OLD | NEW |