OLD | NEW |
| (Empty) |
1 // Copyright 2010 the V8 project authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // This file is an internal atomic implementation, use atomicops.h instead. | |
6 // | |
7 // LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears. | |
8 | |
9 #ifndef V8_ATOMICOPS_INTERNALS_ARM_GCC_H_ | |
10 #define V8_ATOMICOPS_INTERNALS_ARM_GCC_H_ | |
11 | |
12 #if defined(__QNXNTO__) | |
13 #include <sys/cpuinline.h> | |
14 #endif | |
15 | |
16 namespace v8 { | |
17 namespace internal { | |
18 | |
19 // Memory barriers on ARM are funky, but the kernel is here to help: | |
20 // | |
21 // * ARMv5 didn't support SMP, there is no memory barrier instruction at | |
22 // all on this architecture, or when targeting its machine code. | |
23 // | |
24 // * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by | |
25 // writing a random value to a very specific coprocessor register. | |
26 // | |
27 // * On ARMv7, the "dmb" instruction is used to perform a full memory | |
28 // barrier (though writing to the co-processor will still work). | |
29 // However, on single core devices (e.g. Nexus One, or Nexus S), | |
30 // this instruction will take up to 200 ns, which is huge, even though | |
31 // it's completely un-needed on these devices. | |
32 // | |
33 // * There is no easy way to determine at runtime if the device is | |
34 // single or multi-core. However, the kernel provides a useful helper | |
35 // function at a fixed memory address (0xffff0fa0), which will always | |
36 // perform a memory barrier in the most efficient way. I.e. on single | |
37 // core devices, this is an empty function that exits immediately. | |
38 // On multi-core devices, it implements a full memory barrier. | |
39 // | |
40 // * This source could be compiled to ARMv5 machine code that runs on a | |
41 // multi-core ARMv6 or ARMv7 device. In this case, memory barriers | |
42 // are needed for correct execution. Always call the kernel helper, even | |
43 // when targeting ARMv5TE. | |
44 // | |
45 | |
46 inline void MemoryBarrier() { | |
47 #if defined(__linux__) || defined(__ANDROID__) | |
48 // Note: This is a function call, which is also an implicit compiler barrier. | |
49 typedef void (*KernelMemoryBarrierFunc)(); | |
50 ((KernelMemoryBarrierFunc)0xffff0fa0)(); | |
51 #elif defined(__QNXNTO__) | |
52 __cpu_membarrier(); | |
53 #else | |
54 #error MemoryBarrier() is not implemented on this platform. | |
55 #endif | |
56 } | |
57 | |
58 // An ARM toolchain would only define one of these depending on which | |
59 // variant of the target architecture is being used. This tests against | |
60 // any known ARMv6 or ARMv7 variant, where it is possible to directly | |
61 // use ldrex/strex instructions to implement fast atomic operations. | |
62 #if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ | |
63 defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \ | |
64 defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ | |
65 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ | |
66 defined(__ARM_ARCH_6KZ__) || defined(__ARM_ARCH_6T2__) | |
67 | |
68 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, | |
69 Atomic32 old_value, | |
70 Atomic32 new_value) { | |
71 Atomic32 prev_value; | |
72 int reloop; | |
73 do { | |
74 // The following is equivalent to: | |
75 // | |
76 // prev_value = LDREX(ptr) | |
77 // reloop = 0 | |
78 // if (prev_value != old_value) | |
79 // reloop = STREX(ptr, new_value) | |
80 __asm__ __volatile__(" ldrex %0, [%3]\n" | |
81 " mov %1, #0\n" | |
82 " cmp %0, %4\n" | |
83 #ifdef __thumb2__ | |
84 " it eq\n" | |
85 #endif | |
86 " strexeq %1, %5, [%3]\n" | |
87 : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr) | |
88 : "r"(ptr), "r"(old_value), "r"(new_value) | |
89 : "cc", "memory"); | |
90 } while (reloop != 0); | |
91 return prev_value; | |
92 } | |
93 | |
94 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, | |
95 Atomic32 old_value, | |
96 Atomic32 new_value) { | |
97 Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value); | |
98 MemoryBarrier(); | |
99 return result; | |
100 } | |
101 | |
102 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, | |
103 Atomic32 old_value, | |
104 Atomic32 new_value) { | |
105 MemoryBarrier(); | |
106 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); | |
107 } | |
108 | |
109 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, | |
110 Atomic32 increment) { | |
111 Atomic32 value; | |
112 int reloop; | |
113 do { | |
114 // Equivalent to: | |
115 // | |
116 // value = LDREX(ptr) | |
117 // value += increment | |
118 // reloop = STREX(ptr, value) | |
119 // | |
120 __asm__ __volatile__(" ldrex %0, [%3]\n" | |
121 " add %0, %0, %4\n" | |
122 " strex %1, %0, [%3]\n" | |
123 : "=&r"(value), "=&r"(reloop), "+m"(*ptr) | |
124 : "r"(ptr), "r"(increment) | |
125 : "cc", "memory"); | |
126 } while (reloop); | |
127 return value; | |
128 } | |
129 | |
130 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, | |
131 Atomic32 increment) { | |
132 // TODO(digit): Investigate if it's possible to implement this with | |
133 // a single MemoryBarrier() operation between the LDREX and STREX. | |
134 // See http://crbug.com/246514 | |
135 MemoryBarrier(); | |
136 Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment); | |
137 MemoryBarrier(); | |
138 return result; | |
139 } | |
140 | |
141 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, | |
142 Atomic32 new_value) { | |
143 Atomic32 old_value; | |
144 int reloop; | |
145 do { | |
146 // old_value = LDREX(ptr) | |
147 // reloop = STREX(ptr, new_value) | |
148 __asm__ __volatile__(" ldrex %0, [%3]\n" | |
149 " strex %1, %4, [%3]\n" | |
150 : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr) | |
151 : "r"(ptr), "r"(new_value) | |
152 : "cc", "memory"); | |
153 } while (reloop != 0); | |
154 return old_value; | |
155 } | |
156 | |
157 // This tests against any known ARMv5 variant. | |
158 #elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \ | |
159 defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) | |
160 | |
161 // The kernel also provides a helper function to perform an atomic | |
162 // compare-and-swap operation at the hard-wired address 0xffff0fc0. | |
163 // On ARMv5, this is implemented by a special code path that the kernel | |
164 // detects and treats specially when thread pre-emption happens. | |
165 // On ARMv6 and higher, it uses LDREX/STREX instructions instead. | |
166 // | |
167 // Note that this always perform a full memory barrier, there is no | |
168 // need to add calls MemoryBarrier() before or after it. It also | |
169 // returns 0 on success, and 1 on exit. | |
170 // | |
171 // Available and reliable since Linux 2.6.24. Both Android and ChromeOS | |
172 // use newer kernel revisions, so this should not be a concern. | |
173 namespace { | |
174 | |
175 inline int LinuxKernelCmpxchg(Atomic32 old_value, | |
176 Atomic32 new_value, | |
177 volatile Atomic32* ptr) { | |
178 typedef int (*KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32*); | |
179 return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr); | |
180 } | |
181 | |
182 } // namespace | |
183 | |
184 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, | |
185 Atomic32 old_value, | |
186 Atomic32 new_value) { | |
187 Atomic32 prev_value; | |
188 for (;;) { | |
189 prev_value = *ptr; | |
190 if (prev_value != old_value) | |
191 return prev_value; | |
192 if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) | |
193 return old_value; | |
194 } | |
195 } | |
196 | |
197 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, | |
198 Atomic32 new_value) { | |
199 Atomic32 old_value; | |
200 do { | |
201 old_value = *ptr; | |
202 } while (LinuxKernelCmpxchg(old_value, new_value, ptr)); | |
203 return old_value; | |
204 } | |
205 | |
206 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, | |
207 Atomic32 increment) { | |
208 return Barrier_AtomicIncrement(ptr, increment); | |
209 } | |
210 | |
211 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, | |
212 Atomic32 increment) { | |
213 for (;;) { | |
214 // Atomic exchange the old value with an incremented one. | |
215 Atomic32 old_value = *ptr; | |
216 Atomic32 new_value = old_value + increment; | |
217 if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) { | |
218 // The exchange took place as expected. | |
219 return new_value; | |
220 } | |
221 // Otherwise, *ptr changed mid-loop and we need to retry. | |
222 } | |
223 } | |
224 | |
225 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, | |
226 Atomic32 old_value, | |
227 Atomic32 new_value) { | |
228 Atomic32 prev_value; | |
229 for (;;) { | |
230 prev_value = *ptr; | |
231 if (prev_value != old_value) { | |
232 // Always ensure acquire semantics. | |
233 MemoryBarrier(); | |
234 return prev_value; | |
235 } | |
236 if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) | |
237 return old_value; | |
238 } | |
239 } | |
240 | |
241 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, | |
242 Atomic32 old_value, | |
243 Atomic32 new_value) { | |
244 // This could be implemented as: | |
245 // MemoryBarrier(); | |
246 // return NoBarrier_CompareAndSwap(); | |
247 // | |
248 // But would use 3 barriers per succesful CAS. To save performance, | |
249 // use Acquire_CompareAndSwap(). Its implementation guarantees that: | |
250 // - A succesful swap uses only 2 barriers (in the kernel helper). | |
251 // - An early return due to (prev_value != old_value) performs | |
252 // a memory barrier with no store, which is equivalent to the | |
253 // generic implementation above. | |
254 return Acquire_CompareAndSwap(ptr, old_value, new_value); | |
255 } | |
256 | |
257 #else | |
258 # error "Your CPU's ARM architecture is not supported yet" | |
259 #endif | |
260 | |
261 // NOTE: Atomicity of the following load and store operations is only | |
262 // guaranteed in case of 32-bit alignement of |ptr| values. | |
263 | |
264 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { | |
265 *ptr = value; | |
266 } | |
267 | |
268 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { | |
269 *ptr = value; | |
270 MemoryBarrier(); | |
271 } | |
272 | |
273 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { | |
274 MemoryBarrier(); | |
275 *ptr = value; | |
276 } | |
277 | |
278 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; } | |
279 | |
280 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { | |
281 Atomic32 value = *ptr; | |
282 MemoryBarrier(); | |
283 return value; | |
284 } | |
285 | |
286 inline Atomic32 Release_Load(volatile const Atomic32* ptr) { | |
287 MemoryBarrier(); | |
288 return *ptr; | |
289 } | |
290 | |
291 // Byte accessors. | |
292 | |
293 inline void NoBarrier_Store(volatile Atomic8* ptr, Atomic8 value) { | |
294 *ptr = value; | |
295 } | |
296 | |
297 inline Atomic8 NoBarrier_Load(volatile const Atomic8* ptr) { return *ptr; } | |
298 | |
299 } } // namespace v8::internal | |
300 | |
301 #endif // V8_ATOMICOPS_INTERNALS_ARM_GCC_H_ | |
OLD | NEW |