OLD | NEW |
1 // Copyright (c) 2004, Google Inc. | 1 // Copyright (c) 2004, Google Inc. |
2 // All rights reserved. | 2 // All rights reserved. |
3 // | 3 // |
4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
6 // met: | 6 // met: |
7 // | 7 // |
8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
(...skipping 28 matching lines...) Expand all Loading... |
39 // | 39 // |
40 // Also, in some out of order CPU implementations, the CycleClock is not | 40 // Also, in some out of order CPU implementations, the CycleClock is not |
41 // serializing. So if you're trying to count at cycles granularity, your | 41 // serializing. So if you're trying to count at cycles granularity, your |
42 // data might be inaccurate due to out of order instruction execution. | 42 // data might be inaccurate due to out of order instruction execution. |
43 // ---------------------------------------------------------------------- | 43 // ---------------------------------------------------------------------- |
44 | 44 |
45 #ifndef GOOGLE_BASE_CYCLECLOCK_H_ | 45 #ifndef GOOGLE_BASE_CYCLECLOCK_H_ |
46 #define GOOGLE_BASE_CYCLECLOCK_H_ | 46 #define GOOGLE_BASE_CYCLECLOCK_H_ |
47 | 47 |
48 #include "base/basictypes.h" // make sure we get the def for int64 | 48 #include "base/basictypes.h" // make sure we get the def for int64 |
| 49 #include "base/arm_instruction_set_select.h" |
49 #if defined(__MACH__) && defined(__APPLE__) | 50 #if defined(__MACH__) && defined(__APPLE__) |
50 #include <mach/mach_time.h> | 51 # include <mach/mach_time.h> |
| 52 #endif |
| 53 // For MSVC, we want the __rdtsc intrinsic, declared in <intrin.h>. |
| 54 // Unfortunately, in some environments, <windows.h> and <intrin.h> have |
| 55 // conflicting declarations of some other intrinsics, breaking compilation. |
| 56 // Therefore, we simply declare __rdtsc ourselves. See also |
| 57 // http://connect.microsoft.com/VisualStudio/feedback/details/262047 |
| 58 #if defined(_MSC_VER) |
| 59 extern "C" uint64 __rdtsc(); |
| 60 #pragma intrinsic(__rdtsc) |
| 61 #endif |
| 62 #ifdef HAVE_SYS_TIME_H |
| 63 #include <sys/time.h> |
51 #endif | 64 #endif |
52 | 65 |
53 // NOTE: only i386 and x86_64 have been well tested. | 66 // NOTE: only i386 and x86_64 have been well tested. |
54 // PPC, sparc, alpha, and ia64 are based on | 67 // PPC, sparc, alpha, and ia64 are based on |
55 // http://peter.kuscsik.com/wordpress/?p=14 | 68 // http://peter.kuscsik.com/wordpress/?p=14 |
56 // with modifications by m3b. cf | 69 // with modifications by m3b. See also |
57 // https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h | 70 // https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h |
58 struct CycleClock { | 71 struct CycleClock { |
59 // This should return the number of cycles since power-on. Thread-safe. | 72 // This should return the number of cycles since power-on. Thread-safe. |
60 static inline int64 Now() { | 73 static inline int64 Now() { |
61 #if defined(__MACH__) && defined(__APPLE__) | 74 #if defined(__MACH__) && defined(__APPLE__) |
62 // this goes at the top because we need ALL Macs, regardless | 75 // this goes at the top because we need ALL Macs, regardless of |
63 // of architecture, to return the number of "mach time units" | 76 // architecture, to return the number of "mach time units" that |
64 // that have passes since startup. See sysinfo.cc where | 77 // have passed since startup. See sysinfo.cc where |
65 // InitializeSystemInfo() sets the supposed cpu clock frequency of macs | 78 // InitializeSystemInfo() sets the supposed cpu clock frequency of |
66 // to the number of mach time units per second, not actual | 79 // macs to the number of mach time units per second, not actual |
67 // CPU clock frequency (which can change in the face of CPU | 80 // CPU clock frequency (which can change in the face of CPU |
68 // frequency scaling). also note that when the Mac sleeps, | 81 // frequency scaling). Also note that when the Mac sleeps, this |
69 // this counter pauses; it does not continue counting, nor resets | 82 // counter pauses; it does not continue counting, nor does it |
70 // to zero. | 83 // reset to zero. |
71 return mach_absolute_time(); | 84 return mach_absolute_time(); |
72 #elif defined(__i386__) | 85 #elif defined(__i386__) |
73 int64 ret; | 86 int64 ret; |
74 __asm__ volatile ("rdtsc" | 87 __asm__ volatile ("rdtsc" : "=A" (ret) ); |
75 : "=A" (ret) ); | |
76 return ret; | 88 return ret; |
77 #elif defined(__x86_64__) || defined(__amd64__) | 89 #elif defined(__x86_64__) || defined(__amd64__) |
78 uint64 low, high; | 90 uint64 low, high; |
79 __asm__ volatile ("rdtsc" : "=a" (low), "=d" (high)); | 91 __asm__ volatile ("rdtsc" : "=a" (low), "=d" (high)); |
80 return (high << 32) | low; | 92 return (high << 32) | low; |
81 #elif defined(__powerpc__) || defined(__ppc__) | 93 #elif defined(__powerpc__) || defined(__ppc__) |
82 // This returns a time-base, which is not always precisely a cycle-count. | 94 // This returns a time-base, which is not always precisely a cycle-count. |
83 int64 tbl, tbu0, tbu1; | 95 int64 tbl, tbu0, tbu1; |
84 asm("mftbu %0" : "=r" (tbu0)); | 96 asm("mftbu %0" : "=r" (tbu0)); |
85 asm("mftb %0" : "=r" (tbl )); | 97 asm("mftb %0" : "=r" (tbl)); |
86 asm("mftbu %0" : "=r" (tbu1)); | 98 asm("mftbu %0" : "=r" (tbu1)); |
87 tbl &= -static_cast<int64>(tbu0 == tbu1); | 99 tbl &= -static_cast<int64>(tbu0 == tbu1); |
88 // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage) | 100 // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage) |
89 return (tbu1 << 32) | tbl; | 101 return (tbu1 << 32) | tbl; |
90 #elif defined(__sparc__) | 102 #elif defined(__sparc__) |
91 int64 tick; | 103 int64 tick; |
92 asm(".byte 0x83, 0x41, 0x00, 0x00"); | 104 asm(".byte 0x83, 0x41, 0x00, 0x00"); |
93 asm("mov %%g1, %0" : "=r" (tick)); | 105 asm("mov %%g1, %0" : "=r" (tick)); |
94 return tick; | 106 return tick; |
95 #elif defined(__ia64__) | 107 #elif defined(__ia64__) |
96 int64 itc; | 108 int64 itc; |
97 asm("mov %0 = ar.itc" : "=r" (itc)); | 109 asm("mov %0 = ar.itc" : "=r" (itc)); |
98 return itc; | 110 return itc; |
99 #elif defined(_MSC_VER) && defined(_M_IX86) | 111 #elif defined(_MSC_VER) |
100 _asm rdtsc | 112 return __rdtsc(); |
| 113 #elif defined(ARMV3) |
| 114 #if defined(ARMV6) // V6 is the earliest arch that has a standard cyclecount |
| 115 uint32 pmccntr; |
| 116 uint32 pmuseren; |
| 117 uint32 pmcntenset; |
| 118 // Read the user mode perf monitor counter access permissions. |
| 119 asm("mrc p15, 0, %0, c9, c14, 0" : "=r" (pmuseren)); |
| 120 if (pmuseren & 1) { // Allows reading perfmon counters for user mode code. |
| 121 asm("mrc p15, 0, %0, c9, c12, 1" : "=r" (pmcntenset)); |
| 122 if (pmcntenset & 0x80000000ul) { // Is it counting? |
| 123 asm("mrc p15, 0, %0, c9, c13, 0" : "=r" (pmccntr)); |
| 124 // The counter is set up to count every 64th cycle |
| 125 return static_cast<int64>(pmccntr) * 64; // Should optimize to << 6 |
| 126 } |
| 127 } |
| 128 #endif |
| 129 struct timeval tv; |
| 130 gettimeofday(&tv, NULL); |
| 131 return static_cast<int64>(tv.tv_sec) * 1000000 + tv.tv_usec; |
101 #else | 132 #else |
102 // We could define __alpha here as well, but it only has a 32-bit | 133 // The soft failover to a generic implementation is automatic only for ARM. |
103 // timer (good for like 4 seconds), which isn't very useful. | 134 // For other platforms the developer is expected to make an attempt to create |
| 135 // a fast implementation and use generic version if nothing better is available. |
104 #error You need to define CycleTimer for your O/S and CPU | 136 #error You need to define CycleTimer for your O/S and CPU |
105 #endif | 137 #endif |
106 } | 138 } |
107 }; | 139 }; |
108 | 140 |
109 | 141 |
110 #endif // GOOGLE_BASE_CYCLECLOCK_H_ | 142 #endif // GOOGLE_BASE_CYCLECLOCK_H_ |
OLD | NEW |