OLD | NEW |
(Empty) | |
| 1 /* Copyright (c) 2006, Google Inc. |
| 2 * All rights reserved. |
| 3 * |
| 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions are |
| 6 * met: |
| 7 * |
| 8 * * Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. |
| 10 * * Redistributions in binary form must reproduce the above |
| 11 * copyright notice, this list of conditions and the following disclaimer |
| 12 * in the documentation and/or other materials provided with the |
| 13 * distribution. |
| 14 * * Neither the name of Google Inc. nor the names of its |
| 15 * contributors may be used to endorse or promote products derived from |
| 16 * this software without specific prior written permission. |
| 17 * |
| 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 * |
| 30 * --- |
| 31 * Author: Sanjay Ghemawat |
| 32 */ |
| 33 |
| 34 // Implementation of atomic operations for x86. This file should not |
| 35 // be included directly. Clients should instead include |
| 36 // "base/atomicops.h". |
| 37 |
| 38 #ifndef BASE_ATOMICOPS_INTERNALS_X86_H_ |
| 39 #define BASE_ATOMICOPS_INTERNALS_X86_H_ |
| 40 |
| 41 typedef int32_t Atomic32; |
| 42 #define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* |
| 43 |
| 44 |
| 45 // NOTE(vchen): x86 does not need to define AtomicWordCastType, because it |
| 46 // already matches Atomic32 or Atomic64, depending on the platform. |
| 47 |
| 48 |
| 49 // This struct is not part of the public API of this module; clients may not |
| 50 // use it. |
| 51 // Features of this x86. Values may not be correct before main() is run, |
| 52 // but are set conservatively. |
| 53 struct AtomicOps_x86CPUFeatureStruct { |
| 54 bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence |
| 55 // after acquire compare-and-swap. |
| 56 bool has_sse2; // Processor has SSE2. |
| 57 bool has_cmpxchg16b; // Processor supports cmpxchg16b instruction. |
| 58 }; |
| 59 extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures; |
| 60 |
| 61 |
| 62 #define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory") |
| 63 |
| 64 |
| 65 namespace base { |
| 66 namespace subtle { |
| 67 |
| 68 typedef int64_t Atomic64; |
| 69 |
| 70 // 32-bit low-level operations on any platform. |
| 71 |
| 72 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, |
| 73 Atomic32 old_value, |
| 74 Atomic32 new_value) { |
| 75 Atomic32 prev; |
| 76 __asm__ __volatile__("lock; cmpxchgl %1,%2" |
| 77 : "=a" (prev) |
| 78 : "q" (new_value), "m" (*ptr), "0" (old_value) |
| 79 : "memory"); |
| 80 return prev; |
| 81 } |
| 82 |
| 83 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, |
| 84 Atomic32 new_value) { |
| 85 __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg. |
| 86 : "=r" (new_value) |
| 87 : "m" (*ptr), "0" (new_value) |
| 88 : "memory"); |
| 89 return new_value; // Now it's the previous value. |
| 90 } |
| 91 |
| 92 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, |
| 93 Atomic32 increment) { |
| 94 Atomic32 temp = increment; |
| 95 __asm__ __volatile__("lock; xaddl %0,%1" |
| 96 : "+r" (temp), "+m" (*ptr) |
| 97 : : "memory"); |
| 98 // temp now holds the old value of *ptr |
| 99 return temp + increment; |
| 100 } |
| 101 |
| 102 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, |
| 103 Atomic32 increment) { |
| 104 Atomic32 temp = increment; |
| 105 __asm__ __volatile__("lock; xaddl %0,%1" |
| 106 : "+r" (temp), "+m" (*ptr) |
| 107 : : "memory"); |
| 108 // temp now holds the old value of *ptr |
| 109 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { |
| 110 __asm__ __volatile__("lfence" : : : "memory"); |
| 111 } |
| 112 return temp + increment; |
| 113 } |
| 114 |
| 115 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, |
| 116 Atomic32 old_value, |
| 117 Atomic32 new_value) { |
| 118 Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
| 119 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { |
| 120 __asm__ __volatile__("lfence" : : : "memory"); |
| 121 } |
| 122 return x; |
| 123 } |
| 124 |
| 125 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, |
| 126 Atomic32 old_value, |
| 127 Atomic32 new_value) { |
| 128 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
| 129 } |
| 130 |
| 131 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { |
| 132 *ptr = value; |
| 133 } |
| 134 |
| 135 #if defined(__x86_64__) |
| 136 |
| 137 // 64-bit implementations of memory barrier can be simpler, because it |
| 138 // "mfence" is guaranteed to exist. |
| 139 inline void MemoryBarrier() { |
| 140 __asm__ __volatile__("mfence" : : : "memory"); |
| 141 } |
| 142 |
| 143 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { |
| 144 *ptr = value; |
| 145 MemoryBarrier(); |
| 146 } |
| 147 |
| 148 #else |
| 149 |
| 150 inline void MemoryBarrier() { |
| 151 if (AtomicOps_Internalx86CPUFeatures.has_sse2) { |
| 152 __asm__ __volatile__("mfence" : : : "memory"); |
| 153 } else { // mfence is faster but not present on PIII |
| 154 Atomic32 x = 0; |
| 155 NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII |
| 156 } |
| 157 } |
| 158 |
| 159 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { |
| 160 if (AtomicOps_Internalx86CPUFeatures.has_sse2) { |
| 161 *ptr = value; |
| 162 __asm__ __volatile__("mfence" : : : "memory"); |
| 163 } else { |
| 164 NoBarrier_AtomicExchange(ptr, value); |
| 165 // acts as a barrier on PIII |
| 166 } |
| 167 } |
| 168 #endif |
| 169 |
| 170 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { |
| 171 ATOMICOPS_COMPILER_BARRIER(); |
| 172 *ptr = value; // An x86 store acts as a release barrier. |
| 173 // See comments in Atomic64 version of Release_Store(), below. |
| 174 } |
| 175 |
| 176 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { |
| 177 return *ptr; |
| 178 } |
| 179 |
| 180 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { |
| 181 Atomic32 value = *ptr; // An x86 load acts as a acquire barrier. |
| 182 // See comments in Atomic64 version of Release_Store(), below. |
| 183 ATOMICOPS_COMPILER_BARRIER(); |
| 184 return value; |
| 185 } |
| 186 |
| 187 inline Atomic32 Release_Load(volatile const Atomic32* ptr) { |
| 188 MemoryBarrier(); |
| 189 return *ptr; |
| 190 } |
| 191 |
| 192 #if defined(__x86_64__) |
| 193 |
| 194 // 64-bit low-level operations on 64-bit platform. |
| 195 |
| 196 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, |
| 197 Atomic64 old_value, |
| 198 Atomic64 new_value) { |
| 199 Atomic64 prev; |
| 200 __asm__ __volatile__("lock; cmpxchgq %1,%2" |
| 201 : "=a" (prev) |
| 202 : "q" (new_value), "m" (*ptr), "0" (old_value) |
| 203 : "memory"); |
| 204 return prev; |
| 205 } |
| 206 |
| 207 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, |
| 208 Atomic64 new_value) { |
| 209 __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg. |
| 210 : "=r" (new_value) |
| 211 : "m" (*ptr), "0" (new_value) |
| 212 : "memory"); |
| 213 return new_value; // Now it's the previous value. |
| 214 } |
| 215 |
| 216 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, |
| 217 Atomic64 increment) { |
| 218 Atomic64 temp = increment; |
| 219 __asm__ __volatile__("lock; xaddq %0,%1" |
| 220 : "+r" (temp), "+m" (*ptr) |
| 221 : : "memory"); |
| 222 // temp now contains the previous value of *ptr |
| 223 return temp + increment; |
| 224 } |
| 225 |
| 226 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, |
| 227 Atomic64 increment) { |
| 228 Atomic64 temp = increment; |
| 229 __asm__ __volatile__("lock; xaddq %0,%1" |
| 230 : "+r" (temp), "+m" (*ptr) |
| 231 : : "memory"); |
| 232 // temp now contains the previous value of *ptr |
| 233 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { |
| 234 __asm__ __volatile__("lfence" : : : "memory"); |
| 235 } |
| 236 return temp + increment; |
| 237 } |
| 238 |
| 239 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { |
| 240 *ptr = value; |
| 241 } |
| 242 |
| 243 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { |
| 244 *ptr = value; |
| 245 MemoryBarrier(); |
| 246 } |
| 247 |
| 248 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { |
| 249 ATOMICOPS_COMPILER_BARRIER(); |
| 250 |
| 251 *ptr = value; // An x86 store acts as a release barrier |
| 252 // for current AMD/Intel chips as of Jan 2008. |
| 253 // See also Acquire_Load(), below. |
| 254 |
| 255 // When new chips come out, check: |
| 256 // IA-32 Intel Architecture Software Developer's Manual, Volume 3: |
| 257 // System Programming Guide, Chatper 7: Multiple-processor management, |
| 258 // Section 7.2, Memory Ordering. |
| 259 // Last seen at: |
| 260 // http://developer.intel.com/design/pentium4/manuals/index_new.htm |
| 261 // |
| 262 // x86 stores/loads fail to act as barriers for a few instructions (clflush |
| 263 // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are |
| 264 // not generated by the compiler, and are rare. Users of these instructions |
| 265 // need to know about cache behaviour in any case since all of these involve |
| 266 // either flushing cache lines or non-temporal cache hints. |
| 267 } |
| 268 |
| 269 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { |
| 270 return *ptr; |
| 271 } |
| 272 |
| 273 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { |
| 274 Atomic64 value = *ptr; // An x86 load acts as a acquire barrier, |
| 275 // for current AMD/Intel chips as of Jan 2008. |
| 276 // See also Release_Store(), above. |
| 277 ATOMICOPS_COMPILER_BARRIER(); |
| 278 return value; |
| 279 } |
| 280 |
| 281 inline Atomic64 Release_Load(volatile const Atomic64* ptr) { |
| 282 MemoryBarrier(); |
| 283 return *ptr; |
| 284 } |
| 285 |
| 286 #else // defined(__x86_64__) |
| 287 |
| 288 // 64-bit low-level operations on 32-bit platform. |
| 289 |
| 290 #if !((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) |
| 291 // For compilers older than gcc 4.1, we use inline asm. |
| 292 // |
| 293 // Potential pitfalls: |
| 294 // |
| 295 // 1. %ebx points to Global offset table (GOT) with -fPIC. |
| 296 // We need to preserve this register. |
| 297 // 2. When explicit registers are used in inline asm, the |
| 298 // compiler may not be aware of it and might try to reuse |
| 299 // the same register for another argument which has constraints |
| 300 // that allow it ("r" for example). |
| 301 |
| 302 inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr, |
| 303 Atomic64 old_value, |
| 304 Atomic64 new_value) { |
| 305 Atomic64 prev; |
| 306 __asm__ __volatile__("push %%ebx\n\t" |
| 307 "movl (%3), %%ebx\n\t" // Move 64-bit new_value into |
| 308 "movl 4(%3), %%ecx\n\t" // ecx:ebx |
| 309 "lock; cmpxchg8b (%1)\n\t"// If edx:eax (old_value) same |
| 310 "pop %%ebx\n\t" |
| 311 : "=A" (prev) // as contents of ptr: |
| 312 : "D" (ptr), // ecx:ebx => ptr |
| 313 "0" (old_value), // else: |
| 314 "S" (&new_value) // old *ptr => edx:eax |
| 315 : "memory", "%ecx"); |
| 316 return prev; |
| 317 } |
| 318 #endif // Compiler < gcc-4.1 |
| 319 |
| 320 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, |
| 321 Atomic64 old_val, |
| 322 Atomic64 new_val) { |
| 323 return __sync_val_compare_and_swap(ptr, old_val, new_val); |
| 324 } |
| 325 |
| 326 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, |
| 327 Atomic64 new_val) { |
| 328 Atomic64 old_val; |
| 329 |
| 330 do { |
| 331 old_val = *ptr; |
| 332 } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val); |
| 333 |
| 334 return old_val; |
| 335 } |
| 336 |
| 337 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, |
| 338 Atomic64 increment) { |
| 339 Atomic64 old_val, new_val; |
| 340 |
| 341 do { |
| 342 old_val = *ptr; |
| 343 new_val = old_val + increment; |
| 344 } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val); |
| 345 |
| 346 return old_val + increment; |
| 347 } |
| 348 |
| 349 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, |
| 350 Atomic64 increment) { |
| 351 Atomic64 new_val = NoBarrier_AtomicIncrement(ptr, increment); |
| 352 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { |
| 353 __asm__ __volatile__("lfence" : : : "memory"); |
| 354 } |
| 355 return new_val; |
| 356 } |
| 357 |
| 358 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { |
| 359 __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic |
| 360 "movq %%mm0, %0\n\t" // moves (ptr could be read-only) |
| 361 "emms\n\t" // Empty mmx state/Reset FP regs |
| 362 : "=m" (*ptr) |
| 363 : "m" (value) |
| 364 : // mark the FP stack and mmx registers as clobbered |
| 365 "st", "st(1)", "st(2)", "st(3)", "st(4)", |
| 366 "st(5)", "st(6)", "st(7)", "mm0", "mm1", |
| 367 "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); |
| 368 } |
| 369 |
| 370 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { |
| 371 NoBarrier_Store(ptr, value); |
| 372 MemoryBarrier(); |
| 373 } |
| 374 |
| 375 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { |
| 376 ATOMICOPS_COMPILER_BARRIER(); |
| 377 NoBarrier_Store(ptr, value); |
| 378 } |
| 379 |
| 380 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { |
| 381 Atomic64 value; |
| 382 __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic |
| 383 "movq %%mm0, %0\n\t" // moves (ptr could be read-only) |
| 384 "emms\n\t" // Empty mmx state/Reset FP regs |
| 385 : "=m" (value) |
| 386 : "m" (*ptr) |
| 387 : // mark the FP stack and mmx registers as clobbered |
| 388 "st", "st(1)", "st(2)", "st(3)", "st(4)", |
| 389 "st(5)", "st(6)", "st(7)", "mm0", "mm1", |
| 390 "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); |
| 391 return value; |
| 392 } |
| 393 |
| 394 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { |
| 395 Atomic64 value = NoBarrier_Load(ptr); |
| 396 ATOMICOPS_COMPILER_BARRIER(); |
| 397 return value; |
| 398 } |
| 399 |
| 400 inline Atomic64 Release_Load(volatile const Atomic64* ptr) { |
| 401 MemoryBarrier(); |
| 402 return NoBarrier_Load(ptr); |
| 403 } |
| 404 |
| 405 #endif // defined(__x86_64__) |
| 406 |
| 407 inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, |
| 408 Atomic64 old_value, |
| 409 Atomic64 new_value) { |
| 410 Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
| 411 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { |
| 412 __asm__ __volatile__("lfence" : : : "memory"); |
| 413 } |
| 414 return x; |
| 415 } |
| 416 |
| 417 inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, |
| 418 Atomic64 old_value, |
| 419 Atomic64 new_value) { |
| 420 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
| 421 } |
| 422 |
| 423 } // namespace base::subtle |
| 424 } // namespace base |
| 425 |
| 426 #undef ATOMICOPS_COMPILER_BARRIER |
| 427 |
| 428 #endif // BASE_ATOMICOPS_INTERNALS_X86_H_ |
OLD | NEW |