OLD | NEW |
| (Empty) |
1 /* Copyright (c) 2006, Google Inc. | |
2 * All rights reserved. | |
3 * | |
4 * Redistribution and use in source and binary forms, with or without | |
5 * modification, are permitted provided that the following conditions are | |
6 * met: | |
7 * | |
8 * * Redistributions of source code must retain the above copyright | |
9 * notice, this list of conditions and the following disclaimer. | |
10 * * Redistributions in binary form must reproduce the above | |
11 * copyright notice, this list of conditions and the following disclaimer | |
12 * in the documentation and/or other materials provided with the | |
13 * distribution. | |
14 * * Neither the name of Google Inc. nor the names of its | |
15 * contributors may be used to endorse or promote products derived from | |
16 * this software without specific prior written permission. | |
17 * | |
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 * | |
30 * --- | |
31 * Author: Sanjay Ghemawat | |
32 */ | |
33 | |
34 // Implementation of atomic operations for x86. This file should not | |
35 // be included directly. Clients should instead include | |
36 // "base/atomicops.h". | |
37 | |
38 #ifndef BASE_ATOMICOPS_INTERNALS_X86_H_ | |
39 #define BASE_ATOMICOPS_INTERNALS_X86_H_ | |
40 | |
41 typedef int32_t Atomic32; | |
42 #define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* | |
43 | |
44 | |
45 // NOTE(vchen): x86 does not need to define AtomicWordCastType, because it | |
46 // already matches Atomic32 or Atomic64, depending on the platform. | |
47 | |
48 | |
49 // This struct is not part of the public API of this module; clients may not | |
50 // use it. | |
51 // Features of this x86. Values may not be correct before main() is run, | |
52 // but are set conservatively. | |
53 struct AtomicOps_x86CPUFeatureStruct { | |
54 bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence | |
55 // after acquire compare-and-swap. | |
56 bool has_sse2; // Processor has SSE2. | |
57 bool has_cmpxchg16b; // Processor supports cmpxchg16b instruction. | |
58 }; | |
59 extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures; | |
60 | |
61 | |
62 #define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory") | |
63 | |
64 | |
65 namespace base { | |
66 namespace subtle { | |
67 | |
68 typedef int64_t Atomic64; | |
69 | |
70 // 32-bit low-level operations on any platform. | |
71 | |
72 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, | |
73 Atomic32 old_value, | |
74 Atomic32 new_value) { | |
75 Atomic32 prev; | |
76 __asm__ __volatile__("lock; cmpxchgl %1,%2" | |
77 : "=a" (prev) | |
78 : "q" (new_value), "m" (*ptr), "0" (old_value) | |
79 : "memory"); | |
80 return prev; | |
81 } | |
82 | |
83 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, | |
84 Atomic32 new_value) { | |
85 __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg. | |
86 : "=r" (new_value) | |
87 : "m" (*ptr), "0" (new_value) | |
88 : "memory"); | |
89 return new_value; // Now it's the previous value. | |
90 } | |
91 | |
92 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, | |
93 Atomic32 increment) { | |
94 Atomic32 temp = increment; | |
95 __asm__ __volatile__("lock; xaddl %0,%1" | |
96 : "+r" (temp), "+m" (*ptr) | |
97 : : "memory"); | |
98 // temp now holds the old value of *ptr | |
99 return temp + increment; | |
100 } | |
101 | |
102 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, | |
103 Atomic32 increment) { | |
104 Atomic32 temp = increment; | |
105 __asm__ __volatile__("lock; xaddl %0,%1" | |
106 : "+r" (temp), "+m" (*ptr) | |
107 : : "memory"); | |
108 // temp now holds the old value of *ptr | |
109 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { | |
110 __asm__ __volatile__("lfence" : : : "memory"); | |
111 } | |
112 return temp + increment; | |
113 } | |
114 | |
115 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, | |
116 Atomic32 old_value, | |
117 Atomic32 new_value) { | |
118 Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); | |
119 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { | |
120 __asm__ __volatile__("lfence" : : : "memory"); | |
121 } | |
122 return x; | |
123 } | |
124 | |
125 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, | |
126 Atomic32 old_value, | |
127 Atomic32 new_value) { | |
128 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); | |
129 } | |
130 | |
131 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { | |
132 *ptr = value; | |
133 } | |
134 | |
135 #if defined(__x86_64__) | |
136 | |
137 // 64-bit implementations of memory barrier can be simpler, because it | |
138 // "mfence" is guaranteed to exist. | |
139 inline void MemoryBarrier() { | |
140 __asm__ __volatile__("mfence" : : : "memory"); | |
141 } | |
142 | |
143 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { | |
144 *ptr = value; | |
145 MemoryBarrier(); | |
146 } | |
147 | |
148 #else | |
149 | |
150 inline void MemoryBarrier() { | |
151 if (AtomicOps_Internalx86CPUFeatures.has_sse2) { | |
152 __asm__ __volatile__("mfence" : : : "memory"); | |
153 } else { // mfence is faster but not present on PIII | |
154 Atomic32 x = 0; | |
155 NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII | |
156 } | |
157 } | |
158 | |
159 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { | |
160 if (AtomicOps_Internalx86CPUFeatures.has_sse2) { | |
161 *ptr = value; | |
162 __asm__ __volatile__("mfence" : : : "memory"); | |
163 } else { | |
164 NoBarrier_AtomicExchange(ptr, value); | |
165 // acts as a barrier on PIII | |
166 } | |
167 } | |
168 #endif | |
169 | |
170 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { | |
171 ATOMICOPS_COMPILER_BARRIER(); | |
172 *ptr = value; // An x86 store acts as a release barrier. | |
173 // See comments in Atomic64 version of Release_Store(), below. | |
174 } | |
175 | |
176 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { | |
177 return *ptr; | |
178 } | |
179 | |
180 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { | |
181 Atomic32 value = *ptr; // An x86 load acts as a acquire barrier. | |
182 // See comments in Atomic64 version of Release_Store(), below. | |
183 ATOMICOPS_COMPILER_BARRIER(); | |
184 return value; | |
185 } | |
186 | |
187 inline Atomic32 Release_Load(volatile const Atomic32* ptr) { | |
188 MemoryBarrier(); | |
189 return *ptr; | |
190 } | |
191 | |
192 #if defined(__x86_64__) | |
193 | |
194 // 64-bit low-level operations on 64-bit platform. | |
195 | |
196 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, | |
197 Atomic64 old_value, | |
198 Atomic64 new_value) { | |
199 Atomic64 prev; | |
200 __asm__ __volatile__("lock; cmpxchgq %1,%2" | |
201 : "=a" (prev) | |
202 : "q" (new_value), "m" (*ptr), "0" (old_value) | |
203 : "memory"); | |
204 return prev; | |
205 } | |
206 | |
207 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, | |
208 Atomic64 new_value) { | |
209 __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg. | |
210 : "=r" (new_value) | |
211 : "m" (*ptr), "0" (new_value) | |
212 : "memory"); | |
213 return new_value; // Now it's the previous value. | |
214 } | |
215 | |
216 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, | |
217 Atomic64 increment) { | |
218 Atomic64 temp = increment; | |
219 __asm__ __volatile__("lock; xaddq %0,%1" | |
220 : "+r" (temp), "+m" (*ptr) | |
221 : : "memory"); | |
222 // temp now contains the previous value of *ptr | |
223 return temp + increment; | |
224 } | |
225 | |
226 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, | |
227 Atomic64 increment) { | |
228 Atomic64 temp = increment; | |
229 __asm__ __volatile__("lock; xaddq %0,%1" | |
230 : "+r" (temp), "+m" (*ptr) | |
231 : : "memory"); | |
232 // temp now contains the previous value of *ptr | |
233 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { | |
234 __asm__ __volatile__("lfence" : : : "memory"); | |
235 } | |
236 return temp + increment; | |
237 } | |
238 | |
239 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { | |
240 *ptr = value; | |
241 } | |
242 | |
243 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { | |
244 *ptr = value; | |
245 MemoryBarrier(); | |
246 } | |
247 | |
248 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { | |
249 ATOMICOPS_COMPILER_BARRIER(); | |
250 | |
251 *ptr = value; // An x86 store acts as a release barrier | |
252 // for current AMD/Intel chips as of Jan 2008. | |
253 // See also Acquire_Load(), below. | |
254 | |
255 // When new chips come out, check: | |
256 // IA-32 Intel Architecture Software Developer's Manual, Volume 3: | |
257 // System Programming Guide, Chatper 7: Multiple-processor management, | |
258 // Section 7.2, Memory Ordering. | |
259 // Last seen at: | |
260 // http://developer.intel.com/design/pentium4/manuals/index_new.htm | |
261 // | |
262 // x86 stores/loads fail to act as barriers for a few instructions (clflush | |
263 // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are | |
264 // not generated by the compiler, and are rare. Users of these instructions | |
265 // need to know about cache behaviour in any case since all of these involve | |
266 // either flushing cache lines or non-temporal cache hints. | |
267 } | |
268 | |
269 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { | |
270 return *ptr; | |
271 } | |
272 | |
273 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { | |
274 Atomic64 value = *ptr; // An x86 load acts as a acquire barrier, | |
275 // for current AMD/Intel chips as of Jan 2008. | |
276 // See also Release_Store(), above. | |
277 ATOMICOPS_COMPILER_BARRIER(); | |
278 return value; | |
279 } | |
280 | |
281 inline Atomic64 Release_Load(volatile const Atomic64* ptr) { | |
282 MemoryBarrier(); | |
283 return *ptr; | |
284 } | |
285 | |
286 #else // defined(__x86_64__) | |
287 | |
288 // 64-bit low-level operations on 32-bit platform. | |
289 | |
290 #if !((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) | |
291 // For compilers older than gcc 4.1, we use inline asm. | |
292 // | |
293 // Potential pitfalls: | |
294 // | |
295 // 1. %ebx points to Global offset table (GOT) with -fPIC. | |
296 // We need to preserve this register. | |
297 // 2. When explicit registers are used in inline asm, the | |
298 // compiler may not be aware of it and might try to reuse | |
299 // the same register for another argument which has constraints | |
300 // that allow it ("r" for example). | |
301 | |
302 inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr, | |
303 Atomic64 old_value, | |
304 Atomic64 new_value) { | |
305 Atomic64 prev; | |
306 __asm__ __volatile__("push %%ebx\n\t" | |
307 "movl (%3), %%ebx\n\t" // Move 64-bit new_value into | |
308 "movl 4(%3), %%ecx\n\t" // ecx:ebx | |
309 "lock; cmpxchg8b (%1)\n\t"// If edx:eax (old_value) same | |
310 "pop %%ebx\n\t" | |
311 : "=A" (prev) // as contents of ptr: | |
312 : "D" (ptr), // ecx:ebx => ptr | |
313 "0" (old_value), // else: | |
314 "S" (&new_value) // old *ptr => edx:eax | |
315 : "memory", "%ecx"); | |
316 return prev; | |
317 } | |
318 #endif // Compiler < gcc-4.1 | |
319 | |
320 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, | |
321 Atomic64 old_val, | |
322 Atomic64 new_val) { | |
323 return __sync_val_compare_and_swap(ptr, old_val, new_val); | |
324 } | |
325 | |
326 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, | |
327 Atomic64 new_val) { | |
328 Atomic64 old_val; | |
329 | |
330 do { | |
331 old_val = *ptr; | |
332 } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val); | |
333 | |
334 return old_val; | |
335 } | |
336 | |
337 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, | |
338 Atomic64 increment) { | |
339 Atomic64 old_val, new_val; | |
340 | |
341 do { | |
342 old_val = *ptr; | |
343 new_val = old_val + increment; | |
344 } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val); | |
345 | |
346 return old_val + increment; | |
347 } | |
348 | |
349 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, | |
350 Atomic64 increment) { | |
351 Atomic64 new_val = NoBarrier_AtomicIncrement(ptr, increment); | |
352 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { | |
353 __asm__ __volatile__("lfence" : : : "memory"); | |
354 } | |
355 return new_val; | |
356 } | |
357 | |
358 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { | |
359 __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic | |
360 "movq %%mm0, %0\n\t" // moves (ptr could be read-only) | |
361 "emms\n\t" // Empty mmx state/Reset FP regs | |
362 : "=m" (*ptr) | |
363 : "m" (value) | |
364 : // mark the FP stack and mmx registers as clobbered | |
365 "st", "st(1)", "st(2)", "st(3)", "st(4)", | |
366 "st(5)", "st(6)", "st(7)", "mm0", "mm1", | |
367 "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); | |
368 } | |
369 | |
370 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { | |
371 NoBarrier_Store(ptr, value); | |
372 MemoryBarrier(); | |
373 } | |
374 | |
375 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { | |
376 ATOMICOPS_COMPILER_BARRIER(); | |
377 NoBarrier_Store(ptr, value); | |
378 } | |
379 | |
380 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { | |
381 Atomic64 value; | |
382 __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic | |
383 "movq %%mm0, %0\n\t" // moves (ptr could be read-only) | |
384 "emms\n\t" // Empty mmx state/Reset FP regs | |
385 : "=m" (value) | |
386 : "m" (*ptr) | |
387 : // mark the FP stack and mmx registers as clobbered | |
388 "st", "st(1)", "st(2)", "st(3)", "st(4)", | |
389 "st(5)", "st(6)", "st(7)", "mm0", "mm1", | |
390 "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); | |
391 return value; | |
392 } | |
393 | |
394 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { | |
395 Atomic64 value = NoBarrier_Load(ptr); | |
396 ATOMICOPS_COMPILER_BARRIER(); | |
397 return value; | |
398 } | |
399 | |
400 inline Atomic64 Release_Load(volatile const Atomic64* ptr) { | |
401 MemoryBarrier(); | |
402 return NoBarrier_Load(ptr); | |
403 } | |
404 | |
405 #endif // defined(__x86_64__) | |
406 | |
407 inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, | |
408 Atomic64 old_value, | |
409 Atomic64 new_value) { | |
410 Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); | |
411 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { | |
412 __asm__ __volatile__("lfence" : : : "memory"); | |
413 } | |
414 return x; | |
415 } | |
416 | |
417 inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, | |
418 Atomic64 old_value, | |
419 Atomic64 new_value) { | |
420 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); | |
421 } | |
422 | |
423 } // namespace base::subtle | |
424 } // namespace base | |
425 | |
426 #undef ATOMICOPS_COMPILER_BARRIER | |
427 | |
428 #endif // BASE_ATOMICOPS_INTERNALS_X86_H_ | |
OLD | NEW |