OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 | 5 |
6 // Windows Timer Primer | 6 // Windows Timer Primer |
7 // | 7 // |
8 // A good article: http://www.ddj.com/windows/184416651 | 8 // A good article: http://www.ddj.com/windows/184416651 |
9 // A good mozilla bug: http://bugzilla.mozilla.org/show_bug.cgi?id=363258 | 9 // A good mozilla bug: http://bugzilla.mozilla.org/show_bug.cgi?id=363258 |
10 // | 10 // |
(...skipping 289 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
300 // TimeTicks ------------------------------------------------------------------ | 300 // TimeTicks ------------------------------------------------------------------ |
301 namespace { | 301 namespace { |
302 | 302 |
303 // We define a wrapper to adapt between the __stdcall and __cdecl call of the | 303 // We define a wrapper to adapt between the __stdcall and __cdecl call of the |
304 // mock function, and to avoid a static constructor. Assigning an import to a | 304 // mock function, and to avoid a static constructor. Assigning an import to a |
305 // function pointer directly would require setup code to fetch from the IAT. | 305 // function pointer directly would require setup code to fetch from the IAT. |
306 DWORD timeGetTimeWrapper() { | 306 DWORD timeGetTimeWrapper() { |
307 return timeGetTime(); | 307 return timeGetTime(); |
308 } | 308 } |
309 | 309 |
310 DWORD (*tick_function)(void) = &timeGetTimeWrapper; | 310 DWORD (*g_tick_function)(void) = &timeGetTimeWrapper; |
311 | 311 |
312 // Accumulation of time lost due to rollover (in milliseconds). | 312 // Accumulation of time lost due to rollover (in milliseconds). |
313 int64 rollover_ms = 0; | 313 int64 g_rollover_ms = 0; |
314 | 314 |
315 // The last timeGetTime value we saw, to detect rollover. | 315 // The last timeGetTime value we saw, to detect rollover. |
316 DWORD last_seen_now = 0; | 316 DWORD g_last_seen_now = 0; |
317 | 317 |
318 // Lock protecting rollover_ms and last_seen_now. | 318 // Lock protecting rollover_ms and last_seen_now. |
319 // Note: this is a global object, and we usually avoid these. However, the time | 319 // Note: this is a global object, and we usually avoid these. However, the time |
320 // code is low-level, and we don't want to use Singletons here (it would be too | 320 // code is low-level, and we don't want to use Singletons here (it would be too |
321 // easy to use a Singleton without even knowing it, and that may lead to many | 321 // easy to use a Singleton without even knowing it, and that may lead to many |
322 // gotchas). Its impact on startup time should be negligible due to low-level | 322 // gotchas). Its impact on startup time should be negligible due to low-level |
323 // nature of time code. | 323 // nature of time code. |
324 base::Lock rollover_lock; | 324 base::Lock g_rollover_lock; |
325 | 325 |
326 // We use timeGetTime() to implement TimeTicks::Now(). This can be problematic | 326 // We use timeGetTime() to implement TimeTicks::Now(). This can be problematic |
327 // because it returns the number of milliseconds since Windows has started, | 327 // because it returns the number of milliseconds since Windows has started, |
328 // which will roll over the 32-bit value every ~49 days. We try to track | 328 // which will roll over the 32-bit value every ~49 days. We try to track |
329 // rollover ourselves, which works if TimeTicks::Now() is called at least every | 329 // rollover ourselves, which works if TimeTicks::Now() is called at least every |
330 // 49 days. | 330 // 49 days. |
331 TimeDelta RolloverProtectedNow() { | 331 TimeTicks RolloverProtectedNow() { |
332 base::AutoLock locked(rollover_lock); | 332 base::AutoLock locked(g_rollover_lock); |
333 // We should hold the lock while calling tick_function to make sure that | 333 // We should hold the lock while calling tick_function to make sure that |
334 // we keep last_seen_now stay correctly in sync. | 334 // we keep last_seen_now stay correctly in sync. |
335 DWORD now = tick_function(); | 335 DWORD now = g_tick_function(); |
336 if (now < last_seen_now) | 336 if (now < g_last_seen_now) |
337 rollover_ms += 0x100000000I64; // ~49.7 days. | 337 g_rollover_ms += 0x100000000I64; // ~49.7 days. |
338 last_seen_now = now; | 338 g_last_seen_now = now; |
339 return TimeDelta::FromMilliseconds(now + rollover_ms); | 339 return TimeTicks() + TimeDelta::FromMilliseconds(now + g_rollover_ms); |
340 } | 340 } |
341 | 341 |
342 bool IsBuggyAthlon(const base::CPU& cpu) { | 342 // Discussion of tick counter options on Windows: |
343 // On Athlon X2 CPUs (e.g. model 15) QueryPerformanceCounter is | 343 // |
344 // unreliable. Fallback to low-res clock. | |
345 return cpu.vendor_name() == "AuthenticAMD" && cpu.family() == 15; | |
346 } | |
347 | |
348 // Overview of time counters: | |
349 // (1) CPU cycle counter. (Retrieved via RDTSC) | 344 // (1) CPU cycle counter. (Retrieved via RDTSC) |
350 // The CPU counter provides the highest resolution time stamp and is the least | 345 // The CPU counter provides the highest resolution time stamp and is the least |
351 // expensive to retrieve. However, the CPU counter is unreliable and should not | 346 // expensive to retrieve. However, on older CPUs, two issues can affect its |
352 // be used in production. Its biggest issue is that it is per processor and it | 347 // reliability: First it is maintained per processor and not synchronized |
353 // is not synchronized between processors. Also, on some computers, the counters | 348 // between processors. Also, the counters will change frequency due to thermal |
354 // will change frequency due to thermal and power changes, and stop in some | 349 // and power changes, and stop in some states. |
355 // states. | |
356 // | 350 // |
357 // (2) QueryPerformanceCounter (QPC). The QPC counter provides a high- | 351 // (2) QueryPerformanceCounter (QPC). The QPC counter provides a high- |
358 // resolution (100 nanoseconds) time stamp but is comparatively more expensive | 352 // resolution (<1 microsecond) time stamp. On most hardware running today, it |
359 // to retrieve. What QueryPerformanceCounter actually does is up to the HAL. | 353 // auto-detects and uses the constant-rate RDTSC counter to provide extremely |
360 // (with some help from ACPI). | 354 // efficient and reliable time stamps. |
361 // According to http://blogs.msdn.com/oldnewthing/archive/2005/09/02/459952.aspx | 355 // |
362 // in the worst case, it gets the counter from the rollover interrupt on the | 356 // On older CPUs where RDTSC is unreliable, it falls back to using more |
| 357 // expensive (20X to 40X more costly) alternate clocks, such as HPET or the ACPI |
| 358 // PM timer, and can involve system calls; and all this is up to the HAL (with |
| 359 // some help from ACPI). According to |
| 360 // http://blogs.msdn.com/oldnewthing/archive/2005/09/02/459952.aspx, in the |
| 361 // worst case, it gets the counter from the rollover interrupt on the |
363 // programmable interrupt timer. In best cases, the HAL may conclude that the | 362 // programmable interrupt timer. In best cases, the HAL may conclude that the |
364 // RDTSC counter runs at a constant frequency, then it uses that instead. On | 363 // RDTSC counter runs at a constant frequency, then it uses that instead. On |
365 // multiprocessor machines, it will try to verify the values returned from | 364 // multiprocessor machines, it will try to verify the values returned from |
366 // RDTSC on each processor are consistent with each other, and apply a handful | 365 // RDTSC on each processor are consistent with each other, and apply a handful |
367 // of workarounds for known buggy hardware. In other words, QPC is supposed to | 366 // of workarounds for known buggy hardware. In other words, QPC is supposed to |
368 // give consistent result on a multiprocessor computer, but it is unreliable in | 367 // give consistent results on a multiprocessor computer, but for older CPUs it |
369 // reality due to bugs in BIOS or HAL on some, especially old computers. | 368 // can be unreliable due bugs in BIOS or HAL. |
370 // With recent updates on HAL and newer BIOS, QPC is getting more reliable but | |
371 // it should be used with caution. | |
372 // | 369 // |
373 // (3) System time. The system time provides a low-resolution (typically 10ms | 370 // (3) System time. The system time provides a low-resolution (from ~1 to ~15.6 |
374 // to 55 milliseconds) time stamp but is comparatively less expensive to | 371 // milliseconds) time stamp but is comparatively less expensive to retrieve and |
375 // retrieve and more reliable. | 372 // more reliable. Time::EnableHighResolutionTimer() and |
376 class HighResNowSingleton { | 373 // Time::ActivateHighResolutionTimer() can be called to alter the resolution of |
377 public: | 374 // this timer; and also other Windows applications can alter it, affecting this |
378 HighResNowSingleton() | 375 // one. |
379 : ticks_per_second_(0), | |
380 skew_(0) { | |
381 | 376 |
382 base::CPU cpu; | 377 using NowFunction = TimeTicks (*)(void); |
383 if (IsBuggyAthlon(cpu)) | |
384 return; | |
385 | 378 |
386 // Synchronize the QPC clock with GetSystemTimeAsFileTime. | 379 TimeTicks InitialNowFunction(); |
387 LARGE_INTEGER ticks_per_sec = {0}; | 380 TimeTicks InitialSystemTraceNowFunction(); |
388 if (!QueryPerformanceFrequency(&ticks_per_sec)) | |
389 return; // QPC is not available. | |
390 ticks_per_second_ = ticks_per_sec.QuadPart; | |
391 | 381 |
392 skew_ = UnreliableNow() - ReliableNow(); | 382 // See "threading notes" in InitializeNowFunctionPointers() for details on how |
| 383 // concurrent reads/writes to these globals has been made safe. |
| 384 NowFunction g_now_function = &InitialNowFunction; |
| 385 NowFunction g_system_trace_now_function = &InitialSystemTraceNowFunction; |
| 386 int64 g_qpc_ticks_per_second = 0; |
| 387 |
| 388 // As of January 2015, use of <atomic> is forbidden in Chromium code. This is |
| 389 // what std::atomic_thread_fence does on Windows on all Intel architectures when |
| 390 // the memory_order argument is anything but std::memory_order_seq_cst: |
| 391 #define ATOMIC_THREAD_FENCE(memory_order) _ReadWriteBarrier(); |
| 392 |
| 393 TimeDelta QPCValueToTimeDelta(LONGLONG qpc_value) { |
| 394 // Ensure that the assignment to |g_qpc_ticks_per_second|, made in |
| 395 // InitializeNowFunctionPointers(), has happened by this point. |
| 396 ATOMIC_THREAD_FENCE(memory_order_acquire); |
| 397 |
| 398 DCHECK_GT(g_qpc_ticks_per_second, 0); |
| 399 |
| 400 // If the QPC Value is below the overflow threshold, we proceed with |
| 401 // simple multiply and divide. |
| 402 if (qpc_value < Time::kQPCOverflowThreshold) { |
| 403 return TimeDelta::FromMicroseconds( |
| 404 qpc_value * Time::kMicrosecondsPerSecond / g_qpc_ticks_per_second); |
| 405 } |
| 406 // Otherwise, calculate microseconds in a round about manner to avoid |
| 407 // overflow and precision issues. |
| 408 int64 whole_seconds = qpc_value / g_qpc_ticks_per_second; |
| 409 int64 leftover_ticks = qpc_value - (whole_seconds * g_qpc_ticks_per_second); |
| 410 return TimeDelta::FromMicroseconds( |
| 411 (whole_seconds * Time::kMicrosecondsPerSecond) + |
| 412 ((leftover_ticks * Time::kMicrosecondsPerSecond) / |
| 413 g_qpc_ticks_per_second)); |
| 414 } |
| 415 |
| 416 TimeTicks QPCNow() { |
| 417 LARGE_INTEGER now; |
| 418 QueryPerformanceCounter(&now); |
| 419 return TimeTicks() + QPCValueToTimeDelta(now.QuadPart); |
| 420 } |
| 421 |
| 422 bool IsBuggyAthlon(const base::CPU& cpu) { |
| 423 // On Athlon X2 CPUs (e.g. model 15) QueryPerformanceCounter is unreliable. |
| 424 return cpu.vendor_name() == "AuthenticAMD" && cpu.family() == 15; |
| 425 } |
| 426 |
| 427 void InitializeNowFunctionPointers() { |
| 428 LARGE_INTEGER ticks_per_sec = {0}; |
| 429 if (!QueryPerformanceFrequency(&ticks_per_sec)) |
| 430 ticks_per_sec.QuadPart = 0; |
| 431 |
| 432 // If Windows cannot provide a QPC implementation, both Now() and |
| 433 // NowFromSystemTraceTime() must use the low-resolution clock. |
| 434 // |
| 435 // If the QPC implementation is expensive and/or unreliable, Now() will use |
| 436 // the low-resolution clock, but NowFromSystemTraceTime() will use the QPC (in |
| 437 // the hope that it is still useful for tracing purposes). A CPU lacking a |
| 438 // non-stop time counter will cause Windows to provide an alternate QPC |
| 439 // implementation that works, but is expensive to use. Certain Athlon CPUs are |
| 440 // known to make the QPC implementation unreliable. |
| 441 // |
| 442 // Otherwise, both Now functions can use the high-resolution QPC clock. As of |
| 443 // 4 January 2015, ~68% of users fall within this category. |
| 444 NowFunction now_function; |
| 445 NowFunction system_trace_now_function; |
| 446 base::CPU cpu; |
| 447 if (ticks_per_sec.QuadPart <= 0) { |
| 448 now_function = system_trace_now_function = &RolloverProtectedNow; |
| 449 } else if (!cpu.has_non_stop_time_stamp_counter() || IsBuggyAthlon(cpu)) { |
| 450 now_function = &RolloverProtectedNow; |
| 451 system_trace_now_function = &QPCNow; |
| 452 } else { |
| 453 now_function = system_trace_now_function = &QPCNow; |
393 } | 454 } |
394 | 455 |
395 bool IsUsingHighResClock() { | 456 // Threading note 1: In an unlikely race condition, it's possible for two or |
396 return ticks_per_second_ != 0; | 457 // more threads to enter InitializeNowFunctionPointers() in parallel. This is |
397 } | 458 // not a problem since all threads should end up writing out the same values |
398 | 459 // to the global variables. |
399 TimeDelta Now() { | 460 // |
400 if (IsUsingHighResClock()) | 461 // Threading note 2: A release fence is placed here to ensure, from the |
401 return TimeDelta::FromMicroseconds(UnreliableNow()); | 462 // perspective of other threads using the function pointers, that the |
402 | 463 // assignment to |g_qpc_ticks_per_second| happens before the function pointers |
403 // Just fallback to the slower clock. | 464 // are changed. |
404 return RolloverProtectedNow(); | 465 g_qpc_ticks_per_second = ticks_per_sec.QuadPart; |
405 } | 466 ATOMIC_THREAD_FENCE(memory_order_release); |
406 | 467 g_now_function = now_function; |
407 int64 GetQPCDriftMicroseconds() { | 468 g_system_trace_now_function = system_trace_now_function; |
408 if (!IsUsingHighResClock()) | |
409 return 0; | |
410 return abs((UnreliableNow() - ReliableNow()) - skew_); | |
411 } | |
412 | |
413 int64 QPCValueToMicroseconds(LONGLONG qpc_value) { | |
414 if (!ticks_per_second_) | |
415 return 0; | |
416 // If the QPC Value is below the overflow threshold, we proceed with | |
417 // simple multiply and divide. | |
418 if (qpc_value < Time::kQPCOverflowThreshold) | |
419 return qpc_value * Time::kMicrosecondsPerSecond / ticks_per_second_; | |
420 // Otherwise, calculate microseconds in a round about manner to avoid | |
421 // overflow and precision issues. | |
422 int64 whole_seconds = qpc_value / ticks_per_second_; | |
423 int64 leftover_ticks = qpc_value - (whole_seconds * ticks_per_second_); | |
424 int64 microseconds = (whole_seconds * Time::kMicrosecondsPerSecond) + | |
425 ((leftover_ticks * Time::kMicrosecondsPerSecond) / | |
426 ticks_per_second_); | |
427 return microseconds; | |
428 } | |
429 | |
430 private: | |
431 // Get the number of microseconds since boot in an unreliable fashion. | |
432 int64 UnreliableNow() { | |
433 LARGE_INTEGER now; | |
434 QueryPerformanceCounter(&now); | |
435 return QPCValueToMicroseconds(now.QuadPart); | |
436 } | |
437 | |
438 // Get the number of microseconds since boot in a reliable fashion. | |
439 int64 ReliableNow() { | |
440 return RolloverProtectedNow().InMicroseconds(); | |
441 } | |
442 | |
443 int64 ticks_per_second_; // 0 indicates QPF failed and we're broken. | |
444 int64 skew_; // Skew between lo-res and hi-res clocks (for debugging). | |
445 }; | |
446 | |
447 static base::LazyInstance<HighResNowSingleton>::Leaky | |
448 leaky_high_res_now_singleton = LAZY_INSTANCE_INITIALIZER; | |
449 | |
450 HighResNowSingleton* GetHighResNowSingleton() { | |
451 return leaky_high_res_now_singleton.Pointer(); | |
452 } | 469 } |
453 | 470 |
454 TimeDelta HighResNowWrapper() { | 471 TimeTicks InitialNowFunction() { |
455 return GetHighResNowSingleton()->Now(); | 472 InitializeNowFunctionPointers(); |
| 473 return g_now_function(); |
456 } | 474 } |
457 | 475 |
458 typedef TimeDelta (*NowFunction)(void); | 476 TimeTicks InitialSystemTraceNowFunction() { |
459 | 477 InitializeNowFunctionPointers(); |
460 bool CPUReliablySupportsHighResTime() { | 478 return g_system_trace_now_function(); |
461 base::CPU cpu; | |
462 if (!cpu.has_non_stop_time_stamp_counter() || | |
463 !GetHighResNowSingleton()->IsUsingHighResClock()) | |
464 return false; | |
465 | |
466 if (IsBuggyAthlon(cpu)) | |
467 return false; | |
468 | |
469 return true; | |
470 } | |
471 | |
472 TimeDelta InitialNowFunction(); | |
473 | |
474 volatile NowFunction now_function = InitialNowFunction; | |
475 | |
476 TimeDelta InitialNowFunction() { | |
477 if (!CPUReliablySupportsHighResTime()) { | |
478 InterlockedExchangePointer( | |
479 reinterpret_cast<void* volatile*>(&now_function), | |
480 &RolloverProtectedNow); | |
481 return RolloverProtectedNow(); | |
482 } | |
483 InterlockedExchangePointer( | |
484 reinterpret_cast<void* volatile*>(&now_function), | |
485 &HighResNowWrapper); | |
486 return HighResNowWrapper(); | |
487 } | 479 } |
488 | 480 |
489 } // namespace | 481 } // namespace |
490 | 482 |
491 // static | 483 // static |
492 TimeTicks::TickFunctionType TimeTicks::SetMockTickFunction( | 484 TimeTicks::TickFunctionType TimeTicks::SetMockTickFunction( |
493 TickFunctionType ticker) { | 485 TickFunctionType ticker) { |
494 base::AutoLock locked(rollover_lock); | 486 base::AutoLock locked(g_rollover_lock); |
495 TickFunctionType old = tick_function; | 487 TickFunctionType old = g_tick_function; |
496 tick_function = ticker; | 488 g_tick_function = ticker; |
497 rollover_ms = 0; | 489 g_rollover_ms = 0; |
498 last_seen_now = 0; | 490 g_last_seen_now = 0; |
499 return old; | 491 return old; |
500 } | 492 } |
501 | 493 |
502 // static | 494 // static |
503 TimeTicks TimeTicks::Now() { | 495 TimeTicks TimeTicks::Now() { |
504 return TimeTicks() + now_function(); | 496 return g_now_function(); |
505 } | 497 } |
506 | 498 |
507 // static | 499 // static |
508 TimeTicks TimeTicks::HighResNow() { | 500 bool TimeTicks::IsHighResolution() { |
509 return TimeTicks() + HighResNowWrapper(); | 501 if (g_now_function == &InitialNowFunction) |
| 502 InitializeNowFunctionPointers(); |
| 503 return g_now_function == &QPCNow; |
510 } | 504 } |
511 | 505 |
512 // static | 506 // static |
513 bool TimeTicks::IsHighResNowFastAndReliable() { | |
514 return CPUReliablySupportsHighResTime(); | |
515 } | |
516 | |
517 // static | |
518 TimeTicks TimeTicks::ThreadNow() { | 507 TimeTicks TimeTicks::ThreadNow() { |
519 NOTREACHED(); | 508 NOTREACHED(); |
520 return TimeTicks(); | 509 return TimeTicks(); |
521 } | 510 } |
522 | 511 |
523 // static | 512 // static |
524 TimeTicks TimeTicks::NowFromSystemTraceTime() { | 513 TimeTicks TimeTicks::NowFromSystemTraceTime() { |
525 return HighResNow(); | 514 return g_system_trace_now_function(); |
526 } | |
527 | |
528 // static | |
529 int64 TimeTicks::GetQPCDriftMicroseconds() { | |
530 return GetHighResNowSingleton()->GetQPCDriftMicroseconds(); | |
531 } | 515 } |
532 | 516 |
533 // static | 517 // static |
534 TimeTicks TimeTicks::FromQPCValue(LONGLONG qpc_value) { | 518 TimeTicks TimeTicks::FromQPCValue(LONGLONG qpc_value) { |
535 return TimeTicks(GetHighResNowSingleton()->QPCValueToMicroseconds(qpc_value)); | 519 return TimeTicks() + QPCValueToTimeDelta(qpc_value); |
536 } | |
537 | |
538 // static | |
539 bool TimeTicks::IsHighResClockWorking() { | |
540 return GetHighResNowSingleton()->IsUsingHighResClock(); | |
541 } | 520 } |
542 | 521 |
543 // TimeDelta ------------------------------------------------------------------ | 522 // TimeDelta ------------------------------------------------------------------ |
544 | 523 |
545 // static | 524 // static |
546 TimeDelta TimeDelta::FromQPCValue(LONGLONG qpc_value) { | 525 TimeDelta TimeDelta::FromQPCValue(LONGLONG qpc_value) { |
547 return TimeDelta(GetHighResNowSingleton()->QPCValueToMicroseconds(qpc_value)); | 526 return QPCValueToTimeDelta(qpc_value); |
548 } | 527 } |
OLD | NEW |