OLD | NEW |
1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 14 matching lines...) Expand all Loading... |
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | 27 |
28 // This file is an internal atomic implementation, use atomicops.h instead. | 28 // This file is an internal atomic implementation, use atomicops.h instead. |
29 // | 29 // |
30 // LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears. | 30 // LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears. |
31 | 31 |
32 #ifndef V8_ATOMICOPS_INTERNALS_ARM_GCC_H_ | 32 #ifndef V8_ATOMICOPS_INTERNALS_ARM_GCC_H_ |
33 #define V8_ATOMICOPS_INTERNALS_ARM_GCC_H_ | 33 #define V8_ATOMICOPS_INTERNALS_ARM_GCC_H_ |
34 | 34 |
| 35 #if defined(__QNXNTO__) |
| 36 #include <sys/cpuinline.h> |
| 37 #endif |
| 38 |
35 namespace v8 { | 39 namespace v8 { |
36 namespace internal { | 40 namespace internal { |
37 | 41 |
38 // 0xffff0fc0 is the hard coded address of a function provided by | 42 // Memory barriers on ARM are funky, but the kernel is here to help: |
39 // the kernel which implements an atomic compare-exchange. On older | 43 // |
40 // ARM architecture revisions (pre-v6) this may be implemented using | 44 // * ARMv5 didn't support SMP, there is no memory barrier instruction at |
41 // a syscall. This address is stable, and in active use (hard coded) | 45 // all on this architecture, or when targeting its machine code. |
42 // by at least glibc-2.7 and the Android C library. | 46 // |
43 typedef Atomic32 (*LinuxKernelCmpxchgFunc)(Atomic32 old_value, | 47 // * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by |
44 Atomic32 new_value, | 48 // writing a random value to a very specific coprocessor register. |
45 volatile Atomic32* ptr); | 49 // |
46 LinuxKernelCmpxchgFunc pLinuxKernelCmpxchg __attribute__((weak)) = | 50 // * On ARMv7, the "dmb" instruction is used to perform a full memory |
47 (LinuxKernelCmpxchgFunc) 0xffff0fc0; | 51 // barrier (though writing to the co-processor will still work). |
| 52 // However, on single core devices (e.g. Nexus One, or Nexus S), |
| 53 // this instruction will take up to 200 ns, which is huge, even though |
| 54 // it's completely un-needed on these devices. |
| 55 // |
| 56 // * There is no easy way to determine at runtime if the device is |
| 57 // single or multi-core. However, the kernel provides a useful helper |
| 58 // function at a fixed memory address (0xffff0fa0), which will always |
| 59 // perform a memory barrier in the most efficient way. I.e. on single |
| 60 // core devices, this is an empty function that exits immediately. |
| 61 // On multi-core devices, it implements a full memory barrier. |
| 62 // |
| 63 // * This source could be compiled to ARMv5 machine code that runs on a |
| 64 // multi-core ARMv6 or ARMv7 device. In this case, memory barriers |
| 65 // are needed for correct execution. Always call the kernel helper, even |
| 66 // when targeting ARMv5TE. |
| 67 // |
48 | 68 |
49 typedef void (*LinuxKernelMemoryBarrierFunc)(void); | 69 inline void MemoryBarrier() { |
50 LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier __attribute__((weak)) = | 70 #if defined(__linux__) || defined(__ANDROID__) |
51 (LinuxKernelMemoryBarrierFunc) 0xffff0fa0; | 71 // Note: This is a function call, which is also an implicit compiler barrier. |
| 72 typedef void (*KernelMemoryBarrierFunc)(); |
| 73 ((KernelMemoryBarrierFunc)0xffff0fa0)(); |
| 74 #elif defined(__QNXNTO__) |
| 75 __cpu_membarrier(); |
| 76 #else |
| 77 #error MemoryBarrier() is not implemented on this platform. |
| 78 #endif |
| 79 } |
52 | 80 |
| 81 // An ARM toolchain would only define one of these depending on which |
| 82 // variant of the target architecture is being used. This tests against |
| 83 // any known ARMv6 or ARMv7 variant, where it is possible to directly |
| 84 // use ldrex/strex instructions to implement fast atomic operations. |
| 85 #if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ |
| 86 defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \ |
| 87 defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ |
| 88 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ |
| 89 defined(__ARM_ARCH_6KZ__) || defined(__ARM_ARCH_6T2__) |
53 | 90 |
54 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, | 91 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, |
55 Atomic32 old_value, | 92 Atomic32 old_value, |
56 Atomic32 new_value) { | 93 Atomic32 new_value) { |
57 Atomic32 prev_value = *ptr; | 94 Atomic32 prev_value; |
| 95 int reloop; |
58 do { | 96 do { |
59 if (!pLinuxKernelCmpxchg(old_value, new_value, | 97 // The following is equivalent to: |
60 const_cast<Atomic32*>(ptr))) { | 98 // |
| 99 // prev_value = LDREX(ptr) |
| 100 // reloop = 0 |
| 101 // if (prev_value != old_value) |
| 102 // reloop = STREX(ptr, new_value) |
| 103 __asm__ __volatile__(" ldrex %0, [%3]\n" |
| 104 " mov %1, #0\n" |
| 105 " cmp %0, %4\n" |
| 106 #ifdef __thumb2__ |
| 107 " it eq\n" |
| 108 #endif |
| 109 " strexeq %1, %5, [%3]\n" |
| 110 : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr) |
| 111 : "r"(ptr), "r"(old_value), "r"(new_value) |
| 112 : "cc", "memory"); |
| 113 } while (reloop != 0); |
| 114 return prev_value; |
| 115 } |
| 116 |
| 117 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, |
| 118 Atomic32 old_value, |
| 119 Atomic32 new_value) { |
| 120 Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
| 121 MemoryBarrier(); |
| 122 return result; |
| 123 } |
| 124 |
| 125 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, |
| 126 Atomic32 old_value, |
| 127 Atomic32 new_value) { |
| 128 MemoryBarrier(); |
| 129 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
| 130 } |
| 131 |
| 132 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, |
| 133 Atomic32 increment) { |
| 134 Atomic32 value; |
| 135 int reloop; |
| 136 do { |
| 137 // Equivalent to: |
| 138 // |
| 139 // value = LDREX(ptr) |
| 140 // value += increment |
| 141 // reloop = STREX(ptr, value) |
| 142 // |
| 143 __asm__ __volatile__(" ldrex %0, [%3]\n" |
| 144 " add %0, %0, %4\n" |
| 145 " strex %1, %0, [%3]\n" |
| 146 : "=&r"(value), "=&r"(reloop), "+m"(*ptr) |
| 147 : "r"(ptr), "r"(increment) |
| 148 : "cc", "memory"); |
| 149 } while (reloop); |
| 150 return value; |
| 151 } |
| 152 |
| 153 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, |
| 154 Atomic32 increment) { |
| 155 // TODO(digit): Investigate if it's possible to implement this with |
| 156 // a single MemoryBarrier() operation between the LDREX and STREX. |
| 157 // See http://crbug.com/246514 |
| 158 MemoryBarrier(); |
| 159 Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment); |
| 160 MemoryBarrier(); |
| 161 return result; |
| 162 } |
| 163 |
| 164 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, |
| 165 Atomic32 new_value) { |
| 166 Atomic32 old_value; |
| 167 int reloop; |
| 168 do { |
| 169 // old_value = LDREX(ptr) |
| 170 // reloop = STREX(ptr, new_value) |
| 171 __asm__ __volatile__(" ldrex %0, [%3]\n" |
| 172 " strex %1, %4, [%3]\n" |
| 173 : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr) |
| 174 : "r"(ptr), "r"(new_value) |
| 175 : "cc", "memory"); |
| 176 } while (reloop != 0); |
| 177 return old_value; |
| 178 } |
| 179 |
| 180 // This tests against any known ARMv5 variant. |
| 181 #elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \ |
| 182 defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) |
| 183 |
| 184 // The kernel also provides a helper function to perform an atomic |
| 185 // compare-and-swap operation at the hard-wired address 0xffff0fc0. |
| 186 // On ARMv5, this is implemented by a special code path that the kernel |
| 187 // detects and treats specially when thread pre-emption happens. |
| 188 // On ARMv6 and higher, it uses LDREX/STREX instructions instead. |
| 189 // |
| 190 // Note that this always perform a full memory barrier, there is no |
| 191 // need to add calls MemoryBarrier() before or after it. It also |
| 192 // returns 0 on success, and 1 on exit. |
| 193 // |
| 194 // Available and reliable since Linux 2.6.24. Both Android and ChromeOS |
| 195 // use newer kernel revisions, so this should not be a concern. |
| 196 namespace { |
| 197 |
| 198 inline int LinuxKernelCmpxchg(Atomic32 old_value, |
| 199 Atomic32 new_value, |
| 200 volatile Atomic32* ptr) { |
| 201 typedef int (*KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32*); |
| 202 return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr); |
| 203 } |
| 204 |
| 205 } // namespace |
| 206 |
| 207 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, |
| 208 Atomic32 old_value, |
| 209 Atomic32 new_value) { |
| 210 Atomic32 prev_value; |
| 211 for (;;) { |
| 212 prev_value = *ptr; |
| 213 if (prev_value != old_value) |
| 214 return prev_value; |
| 215 if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) |
61 return old_value; | 216 return old_value; |
62 } | 217 } |
63 prev_value = *ptr; | |
64 } while (prev_value == old_value); | |
65 return prev_value; | |
66 } | 218 } |
67 | 219 |
68 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, | 220 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, |
69 Atomic32 new_value) { | 221 Atomic32 new_value) { |
70 Atomic32 old_value; | 222 Atomic32 old_value; |
71 do { | 223 do { |
72 old_value = *ptr; | 224 old_value = *ptr; |
73 } while (pLinuxKernelCmpxchg(old_value, new_value, | 225 } while (LinuxKernelCmpxchg(old_value, new_value, ptr)); |
74 const_cast<Atomic32*>(ptr))); | |
75 return old_value; | 226 return old_value; |
76 } | 227 } |
77 | 228 |
78 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, | 229 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, |
79 Atomic32 increment) { | 230 Atomic32 increment) { |
80 return Barrier_AtomicIncrement(ptr, increment); | 231 return Barrier_AtomicIncrement(ptr, increment); |
81 } | 232 } |
82 | 233 |
83 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, | 234 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, |
84 Atomic32 increment) { | 235 Atomic32 increment) { |
85 for (;;) { | 236 for (;;) { |
86 // Atomic exchange the old value with an incremented one. | 237 // Atomic exchange the old value with an incremented one. |
87 Atomic32 old_value = *ptr; | 238 Atomic32 old_value = *ptr; |
88 Atomic32 new_value = old_value + increment; | 239 Atomic32 new_value = old_value + increment; |
89 if (pLinuxKernelCmpxchg(old_value, new_value, | 240 if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) { |
90 const_cast<Atomic32*>(ptr)) == 0) { | |
91 // The exchange took place as expected. | 241 // The exchange took place as expected. |
92 return new_value; | 242 return new_value; |
93 } | 243 } |
94 // Otherwise, *ptr changed mid-loop and we need to retry. | 244 // Otherwise, *ptr changed mid-loop and we need to retry. |
95 } | 245 } |
96 } | 246 } |
97 | 247 |
98 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, | 248 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, |
99 Atomic32 old_value, | 249 Atomic32 old_value, |
100 Atomic32 new_value) { | 250 Atomic32 new_value) { |
101 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); | 251 Atomic32 prev_value; |
| 252 for (;;) { |
| 253 prev_value = *ptr; |
| 254 if (prev_value != old_value) { |
| 255 // Always ensure acquire semantics. |
| 256 MemoryBarrier(); |
| 257 return prev_value; |
| 258 } |
| 259 if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) |
| 260 return old_value; |
| 261 } |
102 } | 262 } |
103 | 263 |
104 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, | 264 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, |
105 Atomic32 old_value, | 265 Atomic32 old_value, |
106 Atomic32 new_value) { | 266 Atomic32 new_value) { |
107 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); | 267 // This could be implemented as: |
| 268 // MemoryBarrier(); |
| 269 // return NoBarrier_CompareAndSwap(); |
| 270 // |
| 271 // But would use 3 barriers per succesful CAS. To save performance, |
| 272 // use Acquire_CompareAndSwap(). Its implementation guarantees that: |
| 273 // - A succesful swap uses only 2 barriers (in the kernel helper). |
| 274 // - An early return due to (prev_value != old_value) performs |
| 275 // a memory barrier with no store, which is equivalent to the |
| 276 // generic implementation above. |
| 277 return Acquire_CompareAndSwap(ptr, old_value, new_value); |
108 } | 278 } |
109 | 279 |
| 280 #else |
| 281 # error "Your CPU's ARM architecture is not supported yet" |
| 282 #endif |
| 283 |
| 284 // NOTE: Atomicity of the following load and store operations is only |
| 285 // guaranteed in case of 32-bit alignement of |ptr| values. |
| 286 |
110 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { | 287 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { |
111 *ptr = value; | 288 *ptr = value; |
112 } | 289 } |
113 | 290 |
114 inline void MemoryBarrier() { | |
115 pLinuxKernelMemoryBarrier(); | |
116 } | |
117 | |
118 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { | 291 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { |
119 *ptr = value; | 292 *ptr = value; |
120 MemoryBarrier(); | 293 MemoryBarrier(); |
121 } | 294 } |
122 | 295 |
123 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { | 296 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { |
124 MemoryBarrier(); | 297 MemoryBarrier(); |
125 *ptr = value; | 298 *ptr = value; |
126 } | 299 } |
127 | 300 |
128 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { | 301 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; } |
129 return *ptr; | |
130 } | |
131 | 302 |
132 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { | 303 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { |
133 Atomic32 value = *ptr; | 304 Atomic32 value = *ptr; |
134 MemoryBarrier(); | 305 MemoryBarrier(); |
135 return value; | 306 return value; |
136 } | 307 } |
137 | 308 |
138 inline Atomic32 Release_Load(volatile const Atomic32* ptr) { | 309 inline Atomic32 Release_Load(volatile const Atomic32* ptr) { |
139 MemoryBarrier(); | 310 MemoryBarrier(); |
140 return *ptr; | 311 return *ptr; |
141 } | 312 } |
142 | 313 |
143 } } // namespace v8::internal | 314 } } // namespace v8::internal |
144 | 315 |
145 #endif // V8_ATOMICOPS_INTERNALS_ARM_GCC_H_ | 316 #endif // V8_ATOMICOPS_INTERNALS_ARM_GCC_H_ |
OLD | NEW |