third_party/tcmalloc/chromium/src/base/atomicops-internals-x86.h - Issue 1549913002: tcmalloc: Use C++11 atomics where appropriate.

Side by Side Diff: third_party/tcmalloc/chromium/src/base/atomicops-internals-x86.h

Issue 1549913002: tcmalloc: Use C++11 atomics where appropriate. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: rebase Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « third_party/tcmalloc/chromium/src/base/atomicops-internals-linuxppc.h ('k') | third_party/tcmalloc/chromium/src/base/atomicops-internals-x86.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1 /* Copyright (c) 2006, Google Inc.

2 * All rights reserved.

3 *

4 * Redistribution and use in source and binary forms, with or without

5 * modification, are permitted provided that the following conditions are

6 * met:

7 *

8 * * Redistributions of source code must retain the above copyright

9 * notice, this list of conditions and the following disclaimer.

10 * * Redistributions in binary form must reproduce the above

11 * copyright notice, this list of conditions and the following disclaimer

12 * in the documentation and/or other materials provided with the

13 * distribution.

14 * * Neither the name of Google Inc. nor the names of its

15 * contributors may be used to endorse or promote products derived from

16 * this software without specific prior written permission.

17 *

18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

29 *

30 * ---

31 * Author: Sanjay Ghemawat

32 */

33

34 // Implementation of atomic operations for x86. This file should not

35 // be included directly. Clients should instead include

36 // "base/atomicops.h".

37

38 #ifndef BASE_ATOMICOPS_INTERNALS_X86_H_

39 #define BASE_ATOMICOPS_INTERNALS_X86_H_

40

41 typedef int32_t Atomic32;

42 #define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic*

43

44

45 // NOTE(vchen): x86 does not need to define AtomicWordCastType, because it

46 // already matches Atomic32 or Atomic64, depending on the platform.

47

48

49 // This struct is not part of the public API of this module; clients may not

50 // use it.

51 // Features of this x86. Values may not be correct before main() is run,

52 // but are set conservatively.

53 struct AtomicOps_x86CPUFeatureStruct {

54 bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence

55 // after acquire compare-and-swap.

56 bool has_sse2; // Processor has SSE2.

57 bool has_cmpxchg16b; // Processor supports cmpxchg16b instruction.

58 };

59 extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures;

60

61

62 #define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")

63

64

65 namespace base {

66 namespace subtle {

67

68 typedef int64_t Atomic64;

69

70 // 32-bit low-level operations on any platform.

71

72 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,

73 Atomic32 old_value,

74 Atomic32 new_value) {

75 Atomic32 prev;

76 __asm__ __volatile__("lock; cmpxchgl %1,%2"

77 : "=a" (prev)

78 : "q" (new_value), "m" (*ptr), "0" (old_value)

79 : "memory");

80 return prev;

81 }

82

83 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,

84 Atomic32 new_value) {

85 __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg.

86 : "=r" (new_value)

87 : "m" (*ptr), "0" (new_value)

88 : "memory");

89 return new_value; // Now it's the previous value.

90 }

91

92 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,

93 Atomic32 increment) {

94 Atomic32 temp = increment;

95 __asm__ __volatile__("lock; xaddl %0,%1"

96 : "+r" (temp), "+m" (*ptr)

97 : : "memory");

98 // temp now holds the old value of *ptr

99 return temp + increment;

100 }

101

102 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,

103 Atomic32 increment) {

104 Atomic32 temp = increment;

105 __asm__ __volatile__("lock; xaddl %0,%1"

106 : "+r" (temp), "+m" (*ptr)

107 : : "memory");

108 // temp now holds the old value of *ptr

109 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {

110 __asm__ __volatile__("lfence" : : : "memory");

111 }

112 return temp + increment;

113 }

114

115 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,

116 Atomic32 old_value,

117 Atomic32 new_value) {

118 Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);

119 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {

120 __asm__ __volatile__("lfence" : : : "memory");

121 }

122 return x;

123 }

124

125 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,

126 Atomic32 old_value,

127 Atomic32 new_value) {

128 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);

129 }

130

131 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {

132 *ptr = value;

133 }

134

135 #if defined(__x86_64__)

136

137 // 64-bit implementations of memory barrier can be simpler, because it

138 // "mfence" is guaranteed to exist.

139 inline void MemoryBarrier() {

140 __asm__ __volatile__("mfence" : : : "memory");

141 }

142

143 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {

144 *ptr = value;

145 MemoryBarrier();

146 }

147

148 #else

149

150 inline void MemoryBarrier() {

151 if (AtomicOps_Internalx86CPUFeatures.has_sse2) {

152 __asm__ __volatile__("mfence" : : : "memory");

153 } else { // mfence is faster but not present on PIII

154 Atomic32 x = 0;

155 NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII

156 }

157 }

158

159 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {

160 if (AtomicOps_Internalx86CPUFeatures.has_sse2) {

161 *ptr = value;

162 __asm__ __volatile__("mfence" : : : "memory");

163 } else {

164 NoBarrier_AtomicExchange(ptr, value);

165 // acts as a barrier on PIII

166 }

167 }

168 #endif

169

170 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {

171 ATOMICOPS_COMPILER_BARRIER();

172 *ptr = value; // An x86 store acts as a release barrier.

173 // See comments in Atomic64 version of Release_Store(), below.

174 }

175

176 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {

177 return *ptr;

178 }

179

180 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {

181 Atomic32 value = *ptr; // An x86 load acts as a acquire barrier.

182 // See comments in Atomic64 version of Release_Store(), below.

183 ATOMICOPS_COMPILER_BARRIER();

184 return value;

185 }

186

187 inline Atomic32 Release_Load(volatile const Atomic32* ptr) {

188 MemoryBarrier();

189 return *ptr;

190 }

191

192 #if defined(__x86_64__)

193

194 // 64-bit low-level operations on 64-bit platform.

195

196 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,

197 Atomic64 old_value,

198 Atomic64 new_value) {

199 Atomic64 prev;

200 __asm__ __volatile__("lock; cmpxchgq %1,%2"

201 : "=a" (prev)

202 : "q" (new_value), "m" (*ptr), "0" (old_value)

203 : "memory");

204 return prev;

205 }

206

207 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,

208 Atomic64 new_value) {

209 __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg.

210 : "=r" (new_value)

211 : "m" (*ptr), "0" (new_value)

212 : "memory");

213 return new_value; // Now it's the previous value.

214 }

215

216 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,

217 Atomic64 increment) {

218 Atomic64 temp = increment;

219 __asm__ __volatile__("lock; xaddq %0,%1"

220 : "+r" (temp), "+m" (*ptr)

221 : : "memory");

222 // temp now contains the previous value of *ptr

223 return temp + increment;

224 }

225

226 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,

227 Atomic64 increment) {

228 Atomic64 temp = increment;

229 __asm__ __volatile__("lock; xaddq %0,%1"

230 : "+r" (temp), "+m" (*ptr)

231 : : "memory");

232 // temp now contains the previous value of *ptr

233 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {

234 __asm__ __volatile__("lfence" : : : "memory");

235 }

236 return temp + increment;

237 }

238

239 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {

240 *ptr = value;

241 }

242

243 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {

244 *ptr = value;

245 MemoryBarrier();

246 }

247

248 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {

249 ATOMICOPS_COMPILER_BARRIER();

250

251 *ptr = value; // An x86 store acts as a release barrier

252 // for current AMD/Intel chips as of Jan 2008.

253 // See also Acquire_Load(), below.

254

255 // When new chips come out, check:

256 // IA-32 Intel Architecture Software Developer's Manual, Volume 3:

257 // System Programming Guide, Chatper 7: Multiple-processor management,

258 // Section 7.2, Memory Ordering.

259 // Last seen at:

260 // http://developer.intel.com/design/pentium4/manuals/index_new.htm

261 //

262 // x86 stores/loads fail to act as barriers for a few instructions (clflush

263 // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are

264 // not generated by the compiler, and are rare. Users of these instructions

265 // need to know about cache behaviour in any case since all of these involve

266 // either flushing cache lines or non-temporal cache hints.

267 }

268

269 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {

270 return *ptr;

271 }

272

273 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {

274 Atomic64 value = *ptr; // An x86 load acts as a acquire barrier,

275 // for current AMD/Intel chips as of Jan 2008.

276 // See also Release_Store(), above.

277 ATOMICOPS_COMPILER_BARRIER();

278 return value;

279 }

280

281 inline Atomic64 Release_Load(volatile const Atomic64* ptr) {

282 MemoryBarrier();

283 return *ptr;

284 }

285

286 #else // defined(__x86_64__)

287

288 // 64-bit low-level operations on 32-bit platform.

289

290 #if !((__GNUC__ > 4) \|\| (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))

291 // For compilers older than gcc 4.1, we use inline asm.

292 //

293 // Potential pitfalls:

294 //

295 // 1. %ebx points to Global offset table (GOT) with -fPIC.

296 // We need to preserve this register.

297 // 2. When explicit registers are used in inline asm, the

298 // compiler may not be aware of it and might try to reuse

299 // the same register for another argument which has constraints

300 // that allow it ("r" for example).

301

302 inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr,

303 Atomic64 old_value,

304 Atomic64 new_value) {

305 Atomic64 prev;

306 __asm__ __volatile__("push %%ebx\n\t"

307 "movl (%3), %%ebx\n\t" // Move 64-bit new_value into

308 "movl 4(%3), %%ecx\n\t" // ecx:ebx

309 "lock; cmpxchg8b (%1)\n\t"// If edx:eax (old_value) same

310 "pop %%ebx\n\t"

311 : "=A" (prev) // as contents of ptr:

312 : "D" (ptr), // ecx:ebx => ptr

313 "0" (old_value), // else:

314 "S" (&new_value) // old *ptr => edx:eax

315 : "memory", "%ecx");

316 return prev;

317 }

318 #endif // Compiler < gcc-4.1

319

320 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,

321 Atomic64 old_val,

322 Atomic64 new_val) {

323 return __sync_val_compare_and_swap(ptr, old_val, new_val);

324 }

325

326 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,

327 Atomic64 new_val) {

328 Atomic64 old_val;

329

330 do {

331 old_val = *ptr;

332 } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val);

333

334 return old_val;

335 }

336

337 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,

338 Atomic64 increment) {

339 Atomic64 old_val, new_val;

340

341 do {

342 old_val = *ptr;

343 new_val = old_val + increment;

344 } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val);

345

346 return old_val + increment;

347 }

348

349 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,

350 Atomic64 increment) {

351 Atomic64 new_val = NoBarrier_AtomicIncrement(ptr, increment);

352 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {

353 __asm__ __volatile__("lfence" : : : "memory");

354 }

355 return new_val;

356 }

357

358 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {

359 __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic

360 "movq %%mm0, %0\n\t" // moves (ptr could be read-only)

361 "emms\n\t" // Empty mmx state/Reset FP regs

362 : "=m" (*ptr)

363 : "m" (value)

364 : // mark the FP stack and mmx registers as clobbered

365 "st", "st(1)", "st(2)", "st(3)", "st(4)",

366 "st(5)", "st(6)", "st(7)", "mm0", "mm1",

367 "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");

368 }

369

370 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {

371 NoBarrier_Store(ptr, value);

372 MemoryBarrier();

373 }

374

375 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {

376 ATOMICOPS_COMPILER_BARRIER();

377 NoBarrier_Store(ptr, value);

378 }

379

380 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {

381 Atomic64 value;

382 __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic

383 "movq %%mm0, %0\n\t" // moves (ptr could be read-only)

384 "emms\n\t" // Empty mmx state/Reset FP regs

385 : "=m" (value)

386 : "m" (*ptr)

387 : // mark the FP stack and mmx registers as clobbered

388 "st", "st(1)", "st(2)", "st(3)", "st(4)",

389 "st(5)", "st(6)", "st(7)", "mm0", "mm1",

390 "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");

391 return value;

392 }

393

394 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {

395 Atomic64 value = NoBarrier_Load(ptr);

396 ATOMICOPS_COMPILER_BARRIER();

397 return value;

398 }

399

400 inline Atomic64 Release_Load(volatile const Atomic64* ptr) {

401 MemoryBarrier();

402 return NoBarrier_Load(ptr);

403 }

404

405 #endif // defined(__x86_64__)

406

407 inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,

408 Atomic64 old_value,

409 Atomic64 new_value) {

410 Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);

411 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {

412 __asm__ __volatile__("lfence" : : : "memory");

413 }

414 return x;

415 }

416

417 inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,

418 Atomic64 old_value,

419 Atomic64 new_value) {

420 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);

421 }

422

423 } // namespace base::subtle

424 } // namespace base

425

426 #undef ATOMICOPS_COMPILER_BARRIER

427

428 #endif // BASE_ATOMICOPS_INTERNALS_X86_H_

OLD	NEW