Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(380)

Side by Side Diff: base/time/time_win.cc

Issue 797893003: [Windows] One TimeTicks clock: efficient/reliable high-res, with low-res fallback. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Addressed comments from PS1 and PS2. Threading fixes in InitializeNowFunctionPointers(). Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « base/time/time_unittest.cc ('k') | base/time/time_win_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 5
6 // Windows Timer Primer 6 // Windows Timer Primer
7 // 7 //
8 // A good article: http://www.ddj.com/windows/184416651 8 // A good article: http://www.ddj.com/windows/184416651
9 // A good mozilla bug: http://bugzilla.mozilla.org/show_bug.cgi?id=363258 9 // A good mozilla bug: http://bugzilla.mozilla.org/show_bug.cgi?id=363258
10 // 10 //
(...skipping 19 matching lines...) Expand all
30 // To work around all this, we're going to generally use timeGetTime(). We 30 // To work around all this, we're going to generally use timeGetTime(). We
31 // will only increase the system-wide timer if we're not running on battery 31 // will only increase the system-wide timer if we're not running on battery
32 // power. 32 // power.
33 33
34 #include "base/time/time.h" 34 #include "base/time/time.h"
35 35
36 #pragma comment(lib, "winmm.lib") 36 #pragma comment(lib, "winmm.lib")
37 #include <windows.h> 37 #include <windows.h>
38 #include <mmsystem.h> 38 #include <mmsystem.h>
39 39
40 #include "base/atomicops.h"
40 #include "base/basictypes.h" 41 #include "base/basictypes.h"
41 #include "base/cpu.h" 42 #include "base/cpu.h"
42 #include "base/lazy_instance.h" 43 #include "base/lazy_instance.h"
43 #include "base/logging.h" 44 #include "base/logging.h"
44 #include "base/synchronization/lock.h" 45 #include "base/synchronization/lock.h"
45 46
46 using base::Time; 47 using base::Time;
47 using base::TimeDelta; 48 using base::TimeDelta;
48 using base::TimeTicks; 49 using base::TimeTicks;
49 50
(...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after
300 // TimeTicks ------------------------------------------------------------------ 301 // TimeTicks ------------------------------------------------------------------
301 namespace { 302 namespace {
302 303
303 // We define a wrapper to adapt between the __stdcall and __cdecl call of the 304 // We define a wrapper to adapt between the __stdcall and __cdecl call of the
304 // mock function, and to avoid a static constructor. Assigning an import to a 305 // mock function, and to avoid a static constructor. Assigning an import to a
305 // function pointer directly would require setup code to fetch from the IAT. 306 // function pointer directly would require setup code to fetch from the IAT.
306 DWORD timeGetTimeWrapper() { 307 DWORD timeGetTimeWrapper() {
307 return timeGetTime(); 308 return timeGetTime();
308 } 309 }
309 310
310 DWORD (*tick_function)(void) = &timeGetTimeWrapper; 311 DWORD (*g_tick_function)(void) = &timeGetTimeWrapper;
311 312
312 // Accumulation of time lost due to rollover (in milliseconds). 313 // Accumulation of time lost due to rollover (in milliseconds).
313 int64 rollover_ms = 0; 314 int64 g_rollover_ms = 0;
314 315
315 // The last timeGetTime value we saw, to detect rollover. 316 // The last timeGetTime value we saw, to detect rollover.
316 DWORD last_seen_now = 0; 317 DWORD g_last_seen_now = 0;
317 318
318 // Lock protecting rollover_ms and last_seen_now. 319 // Lock protecting rollover_ms and last_seen_now.
319 // Note: this is a global object, and we usually avoid these. However, the time 320 // Note: this is a global object, and we usually avoid these. However, the time
320 // code is low-level, and we don't want to use Singletons here (it would be too 321 // code is low-level, and we don't want to use Singletons here (it would be too
321 // easy to use a Singleton without even knowing it, and that may lead to many 322 // easy to use a Singleton without even knowing it, and that may lead to many
322 // gotchas). Its impact on startup time should be negligible due to low-level 323 // gotchas). Its impact on startup time should be negligible due to low-level
323 // nature of time code. 324 // nature of time code.
324 base::Lock rollover_lock; 325 base::Lock g_rollover_lock;
325 326
326 // We use timeGetTime() to implement TimeTicks::Now(). This can be problematic 327 // We use timeGetTime() to implement TimeTicks::Now(). This can be problematic
327 // because it returns the number of milliseconds since Windows has started, 328 // because it returns the number of milliseconds since Windows has started,
328 // which will roll over the 32-bit value every ~49 days. We try to track 329 // which will roll over the 32-bit value every ~49 days. We try to track
329 // rollover ourselves, which works if TimeTicks::Now() is called at least every 330 // rollover ourselves, which works if TimeTicks::Now() is called at least every
330 // 49 days. 331 // 49 days.
331 TimeDelta RolloverProtectedNow() { 332 TimeTicks RolloverProtectedNow() {
332 base::AutoLock locked(rollover_lock); 333 base::AutoLock locked(g_rollover_lock);
333 // We should hold the lock while calling tick_function to make sure that 334 // We should hold the lock while calling tick_function to make sure that
334 // we keep last_seen_now stay correctly in sync. 335 // we keep last_seen_now stay correctly in sync.
335 DWORD now = tick_function(); 336 DWORD now = g_tick_function();
336 if (now < last_seen_now) 337 if (now < g_last_seen_now)
337 rollover_ms += 0x100000000I64; // ~49.7 days. 338 g_rollover_ms += 0x100000000I64; // ~49.7 days.
338 last_seen_now = now; 339 g_last_seen_now = now;
339 return TimeDelta::FromMilliseconds(now + rollover_ms); 340 return TimeTicks() + TimeDelta::FromMilliseconds(now + g_rollover_ms);
340 } 341 }
341 342
342 bool IsBuggyAthlon(const base::CPU& cpu) { 343 // Discussion of tick counter options on Windows:
343 // On Athlon X2 CPUs (e.g. model 15) QueryPerformanceCounter is 344 //
344 // unreliable. Fallback to low-res clock.
345 return cpu.vendor_name() == "AuthenticAMD" && cpu.family() == 15;
346 }
347
348 // Overview of time counters:
349 // (1) CPU cycle counter. (Retrieved via RDTSC) 345 // (1) CPU cycle counter. (Retrieved via RDTSC)
350 // The CPU counter provides the highest resolution time stamp and is the least 346 // The CPU counter provides the highest resolution time stamp and is the least
351 // expensive to retrieve. However, the CPU counter is unreliable and should not 347 // expensive to retrieve. However, the CPU counter is unreliable and should not
352 // be used in production. Its biggest issue is that it is per processor and it 348 // be used in production. Its biggest issue is that it is per processor and it
353 // is not synchronized between processors. Also, on some computers, the counters 349 // is not synchronized between processors. Also, on some computers, the counters
354 // will change frequency due to thermal and power changes, and stop in some 350 // will change frequency due to thermal and power changes, and stop in some
355 // states. 351 // states.
356 // 352 //
357 // (2) QueryPerformanceCounter (QPC). The QPC counter provides a high- 353 // (2) QueryPerformanceCounter (QPC). The QPC counter provides a high-
358 // resolution (100 nanoseconds) time stamp but is comparatively more expensive 354 // resolution (100 nanoseconds) time stamp but is comparatively more expensive
359 // to retrieve. What QueryPerformanceCounter actually does is up to the HAL. 355 // to retrieve. What QueryPerformanceCounter actually does is up to the HAL.
360 // (with some help from ACPI). 356 // (with some help from ACPI).
361 // According to http://blogs.msdn.com/oldnewthing/archive/2005/09/02/459952.aspx 357 // According to http://blogs.msdn.com/oldnewthing/archive/2005/09/02/459952.aspx
362 // in the worst case, it gets the counter from the rollover interrupt on the 358 // in the worst case, it gets the counter from the rollover interrupt on the
363 // programmable interrupt timer. In best cases, the HAL may conclude that the 359 // programmable interrupt timer. In best cases, the HAL may conclude that the
364 // RDTSC counter runs at a constant frequency, then it uses that instead. On 360 // RDTSC counter runs at a constant frequency, then it uses that instead. On
365 // multiprocessor machines, it will try to verify the values returned from 361 // multiprocessor machines, it will try to verify the values returned from
366 // RDTSC on each processor are consistent with each other, and apply a handful 362 // RDTSC on each processor are consistent with each other, and apply a handful
cpu_(ooo_6.6-7.5) 2015/01/07 17:41:31 comment section 361 - 367 needs update?
miu 2015/01/07 19:20:18 Good idea. I'll update this discussion with all m
367 // of workarounds for known buggy hardware. In other words, QPC is supposed to 363 // of workarounds for known buggy hardware. In other words, QPC is supposed to
368 // give consistent result on a multiprocessor computer, but it is unreliable in 364 // give consistent result on a multiprocessor computer, but it is unreliable in
369 // reality due to bugs in BIOS or HAL on some, especially old computers. 365 // reality due to bugs in BIOS or HAL on some, especially old computers.
370 // With recent updates on HAL and newer BIOS, QPC is getting more reliable but 366 // With recent updates on HAL and newer BIOS, QPC is getting more reliable but
371 // it should be used with caution. 367 // it should be used with caution.
372 // 368 //
373 // (3) System time. The system time provides a low-resolution (typically 10ms 369 // (3) System time. The system time provides a low-resolution (typically 10ms
374 // to 55 milliseconds) time stamp but is comparatively less expensive to 370 // to 55 milliseconds) time stamp but is comparatively less expensive to
cpu_(ooo_6.6-7.5) 2015/01/07 17:41:31 55 ?
miu 2015/01/07 19:20:17 Not sure where these numbers come from, but I'll u
375 // retrieve and more reliable. 371 // retrieve and more reliable.
376 class HighResNowSingleton {
377 public:
378 HighResNowSingleton()
379 : ticks_per_second_(0),
380 skew_(0) {
381 372
382 base::CPU cpu; 373 using NowFunction = TimeTicks (*)(void);
383 if (IsBuggyAthlon(cpu))
384 return;
385 374
386 // Synchronize the QPC clock with GetSystemTimeAsFileTime. 375 TimeTicks InitialNowFunction();
387 LARGE_INTEGER ticks_per_sec = {0}; 376 TimeTicks InitialSystemTraceNowFunction();
388 if (!QueryPerformanceFrequency(&ticks_per_sec))
389 return; // QPC is not available.
390 ticks_per_second_ = ticks_per_sec.QuadPart;
391 377
392 skew_ = UnreliableNow() - ReliableNow(); 378 // See "threading notes" in InitializeNowFunctionPointers() for details on how
379 // concurrent reads/writes to these globals has been made safe.
380 NowFunction g_now_function = &InitialNowFunction;
381 NowFunction g_system_trace_now_function = &InitialSystemTraceNowFunction;
382 int64 g_qpc_ticks_per_second = 0;
383
384 TimeDelta QPCValueToTimeDelta(LONGLONG qpc_value) {
brianderson 2015/01/07 19:53:11 @brucedawson makes a good point about needing to p
miu 2015/01/07 22:04:32 Done. I had considered this, but deemed it imposs
385 DCHECK_GT(g_qpc_ticks_per_second, 0);
386
387 // If the QPC Value is below the overflow threshold, we proceed with
388 // simple multiply and divide.
389 if (qpc_value < Time::kQPCOverflowThreshold) {
390 return TimeDelta::FromMicroseconds(
391 qpc_value * Time::kMicrosecondsPerSecond / g_qpc_ticks_per_second);
392 }
393 // Otherwise, calculate microseconds in a round about manner to avoid
394 // overflow and precision issues.
395 int64 whole_seconds = qpc_value / g_qpc_ticks_per_second;
396 int64 leftover_ticks = qpc_value - (whole_seconds * g_qpc_ticks_per_second);
397 return TimeDelta::FromMicroseconds(
398 (whole_seconds * Time::kMicrosecondsPerSecond) +
399 ((leftover_ticks * Time::kMicrosecondsPerSecond) /
400 g_qpc_ticks_per_second));
401 }
402
403 TimeTicks QPCNow() {
404 LARGE_INTEGER now;
405 QueryPerformanceCounter(&now);
406 return TimeTicks() + QPCValueToTimeDelta(now.QuadPart);
407 }
408
409 bool IsBuggyAthlon(const base::CPU& cpu) {
410 // On Athlon X2 CPUs (e.g. model 15) QueryPerformanceCounter is unreliable.
411 return cpu.vendor_name() == "AuthenticAMD" && cpu.family() == 15;
412 }
413
414 void InitializeNowFunctionPointers() {
415 LARGE_INTEGER ticks_per_sec = {0};
416 if (!QueryPerformanceFrequency(&ticks_per_sec))
417 ticks_per_sec.QuadPart = 0;
418
419 // If Windows cannot provide a QPC implementation, both Now() and
420 // NowFromSystemTraceTime() must use the low-resolution clock.
421 //
422 // If the QPC implementation is expensive and/or unreliable, Now() will use
423 // the low-resolution clock, but NowFromSystemTraceTime() will use the QPC (in
424 // the hope that it is still useful for tracing purposes). A CPU lacking a
425 // non-stop time counter will cause Windows to provide an alternate QPC
426 // implementation that works, but is expensive to use. Certain Athlon CPUs are
427 // known to make the QPC implementation unreliable.
428 //
429 // Otherwise, both Now functions can use the high-resolution QPC clock. As of
430 // 4 January 2015, ~68% of users fall within this category.
431 NowFunction now_function;
432 NowFunction system_trace_now_function;
433 base::CPU cpu;
434 if (ticks_per_sec.QuadPart <= 0) {
435 now_function = system_trace_now_function = &RolloverProtectedNow;
436 } else if (!cpu.has_non_stop_time_stamp_counter() || IsBuggyAthlon(cpu)) {
437 now_function = &RolloverProtectedNow;
438 system_trace_now_function = &QPCNow;
439 } else {
440 now_function = system_trace_now_function = &QPCNow;
393 } 441 }
394 442
395 bool IsUsingHighResClock() { 443 // Threading note 1: In an unlikely race condition, it's possible for two or
396 return ticks_per_second_ != 0; 444 // more threads to enter InitializeNowFunctionPointers() in parallel. This is
397 } 445 // not a problem since all threads should end up writing out the same values
398 446 // to the global variables.
399 TimeDelta Now() { 447 //
cpu_(ooo_6.6-7.5) 2015/01/07 17:41:31 remind me again why we don't simply initialize thi
miu 2015/01/07 19:20:17 We could. I was just sticking with what we had.
400 if (IsUsingHighResClock()) 448 // Threading note 2: The store to |g_qpc_ticks_per_second| must be complete
401 return TimeDelta::FromMicroseconds(UnreliableNow()); 449 // and visible to other threads before the new value for |g_now_function|
402 450 // becomes visible to other threads. A memory barrier is used to guarantee
403 // Just fallback to the slower clock. 451 // this ordering.
404 return RolloverProtectedNow(); 452 g_qpc_ticks_per_second = ticks_per_sec.QuadPart;
405 } 453 base::subtle::MemoryBarrier();
406 454 g_now_function = now_function;
407 int64 GetQPCDriftMicroseconds() { 455 g_system_trace_now_function = system_trace_now_function;
408 if (!IsUsingHighResClock())
409 return 0;
410 return abs((UnreliableNow() - ReliableNow()) - skew_);
411 }
412
413 int64 QPCValueToMicroseconds(LONGLONG qpc_value) {
414 if (!ticks_per_second_)
415 return 0;
416 // If the QPC Value is below the overflow threshold, we proceed with
417 // simple multiply and divide.
418 if (qpc_value < Time::kQPCOverflowThreshold)
419 return qpc_value * Time::kMicrosecondsPerSecond / ticks_per_second_;
420 // Otherwise, calculate microseconds in a round about manner to avoid
421 // overflow and precision issues.
422 int64 whole_seconds = qpc_value / ticks_per_second_;
423 int64 leftover_ticks = qpc_value - (whole_seconds * ticks_per_second_);
424 int64 microseconds = (whole_seconds * Time::kMicrosecondsPerSecond) +
425 ((leftover_ticks * Time::kMicrosecondsPerSecond) /
426 ticks_per_second_);
427 return microseconds;
428 }
429
430 private:
431 // Get the number of microseconds since boot in an unreliable fashion.
432 int64 UnreliableNow() {
433 LARGE_INTEGER now;
434 QueryPerformanceCounter(&now);
435 return QPCValueToMicroseconds(now.QuadPart);
436 }
437
438 // Get the number of microseconds since boot in a reliable fashion.
439 int64 ReliableNow() {
440 return RolloverProtectedNow().InMicroseconds();
441 }
442
443 int64 ticks_per_second_; // 0 indicates QPF failed and we're broken.
444 int64 skew_; // Skew between lo-res and hi-res clocks (for debugging).
445 };
446
447 static base::LazyInstance<HighResNowSingleton>::Leaky
448 leaky_high_res_now_singleton = LAZY_INSTANCE_INITIALIZER;
449
450 HighResNowSingleton* GetHighResNowSingleton() {
451 return leaky_high_res_now_singleton.Pointer();
452 } 456 }
453 457
454 TimeDelta HighResNowWrapper() { 458 TimeTicks InitialNowFunction() {
455 return GetHighResNowSingleton()->Now(); 459 InitializeNowFunctionPointers();
460 return g_now_function();
456 } 461 }
457 462
458 typedef TimeDelta (*NowFunction)(void); 463 TimeTicks InitialSystemTraceNowFunction() {
459 464 InitializeNowFunctionPointers();
460 bool CPUReliablySupportsHighResTime() { 465 return g_system_trace_now_function();
461 base::CPU cpu;
462 if (!cpu.has_non_stop_time_stamp_counter() ||
463 !GetHighResNowSingleton()->IsUsingHighResClock())
464 return false;
465
466 if (IsBuggyAthlon(cpu))
467 return false;
468
469 return true;
470 }
471
472 TimeDelta InitialNowFunction();
473
474 volatile NowFunction now_function = InitialNowFunction;
475
476 TimeDelta InitialNowFunction() {
477 if (!CPUReliablySupportsHighResTime()) {
478 InterlockedExchangePointer(
479 reinterpret_cast<void* volatile*>(&now_function),
480 &RolloverProtectedNow);
481 return RolloverProtectedNow();
482 }
483 InterlockedExchangePointer(
484 reinterpret_cast<void* volatile*>(&now_function),
485 &HighResNowWrapper);
486 return HighResNowWrapper();
487 } 466 }
488 467
489 } // namespace 468 } // namespace
490 469
491 // static 470 // static
492 TimeTicks::TickFunctionType TimeTicks::SetMockTickFunction( 471 TimeTicks::TickFunctionType TimeTicks::SetMockTickFunction(
493 TickFunctionType ticker) { 472 TickFunctionType ticker) {
494 base::AutoLock locked(rollover_lock); 473 base::AutoLock locked(g_rollover_lock);
495 TickFunctionType old = tick_function; 474 TickFunctionType old = g_tick_function;
496 tick_function = ticker; 475 g_tick_function = ticker;
497 rollover_ms = 0; 476 g_rollover_ms = 0;
498 last_seen_now = 0; 477 g_last_seen_now = 0;
499 return old; 478 return old;
500 } 479 }
501 480
502 // static 481 // static
503 TimeTicks TimeTicks::Now() { 482 TimeTicks TimeTicks::Now() {
504 return TimeTicks() + now_function(); 483 return g_now_function();
505 } 484 }
506 485
507 // static 486 // static
508 TimeTicks TimeTicks::HighResNow() { 487 bool TimeTicks::IsHighResolution() {
509 return TimeTicks() + HighResNowWrapper(); 488 if (g_now_function == &InitialNowFunction)
489 InitializeNowFunctionPointers();
490 return g_now_function == &QPCNow;
510 } 491 }
511 492
512 // static 493 // static
513 bool TimeTicks::IsHighResNowFastAndReliable() {
514 return CPUReliablySupportsHighResTime();
515 }
516
517 // static
518 TimeTicks TimeTicks::ThreadNow() { 494 TimeTicks TimeTicks::ThreadNow() {
519 NOTREACHED(); 495 NOTREACHED();
520 return TimeTicks(); 496 return TimeTicks();
521 } 497 }
522 498
523 // static 499 // static
524 TimeTicks TimeTicks::NowFromSystemTraceTime() { 500 TimeTicks TimeTicks::NowFromSystemTraceTime() {
525 return HighResNow(); 501 return g_system_trace_now_function();
526 }
527
528 // static
529 int64 TimeTicks::GetQPCDriftMicroseconds() {
530 return GetHighResNowSingleton()->GetQPCDriftMicroseconds();
531 } 502 }
532 503
533 // static 504 // static
534 TimeTicks TimeTicks::FromQPCValue(LONGLONG qpc_value) { 505 TimeTicks TimeTicks::FromQPCValue(LONGLONG qpc_value) {
535 return TimeTicks(GetHighResNowSingleton()->QPCValueToMicroseconds(qpc_value)); 506 return TimeTicks() + QPCValueToTimeDelta(qpc_value);
536 }
537
538 // static
539 bool TimeTicks::IsHighResClockWorking() {
540 return GetHighResNowSingleton()->IsUsingHighResClock();
541 } 507 }
542 508
543 // TimeDelta ------------------------------------------------------------------ 509 // TimeDelta ------------------------------------------------------------------
544 510
545 // static 511 // static
546 TimeDelta TimeDelta::FromQPCValue(LONGLONG qpc_value) { 512 TimeDelta TimeDelta::FromQPCValue(LONGLONG qpc_value) {
547 return TimeDelta(GetHighResNowSingleton()->QPCValueToMicroseconds(qpc_value)); 513 return QPCValueToTimeDelta(qpc_value);
548 } 514 }
OLDNEW
« no previous file with comments | « base/time/time_unittest.cc ('k') | base/time/time_win_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698