| OLD | NEW |
| 1 //===-- AtomicExpandPass.cpp - Expand atomic instructions -------===// | 1 //===-- AtomicExpandPass.cpp - Expand atomic instructions -------===// |
| 2 // | 2 // |
| 3 // The LLVM Compiler Infrastructure | 3 // The LLVM Compiler Infrastructure |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file contains a pass (at IR level) to replace atomic instructions with | 10 // This file contains a pass (at IR level) to replace atomic instructions with |
| 11 // either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg
. | 11 // either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg
. |
| 12 // | 12 // |
| 13 //===----------------------------------------------------------------------===// | 13 //===----------------------------------------------------------------------===// |
| 14 | 14 |
| 15 #include "llvm/CodeGen/AtomicExpandUtils.h" |
| 15 #include "llvm/CodeGen/Passes.h" | 16 #include "llvm/CodeGen/Passes.h" |
| 16 #include "llvm/IR/Function.h" | 17 #include "llvm/IR/Function.h" |
| 17 #include "llvm/IR/IRBuilder.h" | 18 #include "llvm/IR/IRBuilder.h" |
| 18 #include "llvm/IR/InstIterator.h" | 19 #include "llvm/IR/InstIterator.h" |
| 19 #include "llvm/IR/Instructions.h" | 20 #include "llvm/IR/Instructions.h" |
| 20 #include "llvm/IR/Intrinsics.h" | 21 #include "llvm/IR/Intrinsics.h" |
| 21 #include "llvm/IR/Module.h" | 22 #include "llvm/IR/Module.h" |
| 22 #include "llvm/Support/Debug.h" | 23 #include "llvm/Support/Debug.h" |
| 23 #include "llvm/Target/TargetLowering.h" | 24 #include "llvm/Target/TargetLowering.h" |
| 24 #include "llvm/Target/TargetMachine.h" | 25 #include "llvm/Target/TargetMachine.h" |
| (...skipping 18 matching lines...) Expand all Loading... |
| 43 | 44 |
| 44 private: | 45 private: |
| 45 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order, | 46 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order, |
| 46 bool IsStore, bool IsLoad); | 47 bool IsStore, bool IsLoad); |
| 47 bool expandAtomicLoad(LoadInst *LI); | 48 bool expandAtomicLoad(LoadInst *LI); |
| 48 bool expandAtomicLoadToLL(LoadInst *LI); | 49 bool expandAtomicLoadToLL(LoadInst *LI); |
| 49 bool expandAtomicLoadToCmpXchg(LoadInst *LI); | 50 bool expandAtomicLoadToCmpXchg(LoadInst *LI); |
| 50 bool expandAtomicStore(StoreInst *SI); | 51 bool expandAtomicStore(StoreInst *SI); |
| 51 bool tryExpandAtomicRMW(AtomicRMWInst *AI); | 52 bool tryExpandAtomicRMW(AtomicRMWInst *AI); |
| 52 bool expandAtomicRMWToLLSC(AtomicRMWInst *AI); | 53 bool expandAtomicRMWToLLSC(AtomicRMWInst *AI); |
| 53 bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI); | |
| 54 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); | 54 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); |
| 55 bool isIdempotentRMW(AtomicRMWInst *AI); | 55 bool isIdempotentRMW(AtomicRMWInst *AI); |
| 56 bool simplifyIdempotentRMW(AtomicRMWInst *AI); | 56 bool simplifyIdempotentRMW(AtomicRMWInst *AI); |
| 57 }; | 57 }; |
| 58 } | 58 } |
| 59 | 59 |
| 60 char AtomicExpand::ID = 0; | 60 char AtomicExpand::ID = 0; |
| 61 char &llvm::AtomicExpandID = AtomicExpand::ID; | 61 char &llvm::AtomicExpandID = AtomicExpand::ID; |
| 62 INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", | 62 INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", |
| 63 "Expand Atomic calls in terms of either load-linked & store-conditional or c
mpxchg", | 63 "Expand Atomic calls in terms of either load-linked & store-conditional or c
mpxchg", |
| (...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 219 IRBuilder<> Builder(SI); | 219 IRBuilder<> Builder(SI); |
| 220 AtomicRMWInst *AI = | 220 AtomicRMWInst *AI = |
| 221 Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), | 221 Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), |
| 222 SI->getValueOperand(), SI->getOrdering()); | 222 SI->getValueOperand(), SI->getOrdering()); |
| 223 SI->eraseFromParent(); | 223 SI->eraseFromParent(); |
| 224 | 224 |
| 225 // Now we have an appropriate swap instruction, lower it as usual. | 225 // Now we have an appropriate swap instruction, lower it as usual. |
| 226 return tryExpandAtomicRMW(AI); | 226 return tryExpandAtomicRMW(AI); |
| 227 } | 227 } |
| 228 | 228 |
| 229 static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, |
| 230 Value *Loaded, Value *NewVal, |
| 231 AtomicOrdering MemOpOrder, |
| 232 Value *&Success, Value *&NewLoaded) { |
| 233 Value* Pair = Builder.CreateAtomicCmpXchg( |
| 234 Addr, Loaded, NewVal, MemOpOrder, |
| 235 AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); |
| 236 Success = Builder.CreateExtractValue(Pair, 1, "success"); |
| 237 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); |
| 238 } |
| 239 |
| 229 bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { | 240 bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { |
| 230 switch (TLI->shouldExpandAtomicRMWInIR(AI)) { | 241 switch (TLI->shouldExpandAtomicRMWInIR(AI)) { |
| 231 case TargetLoweringBase::AtomicRMWExpansionKind::None: | 242 case TargetLoweringBase::AtomicRMWExpansionKind::None: |
| 232 return false; | 243 return false; |
| 233 case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: { | 244 case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: { |
| 234 assert(TLI->hasLoadLinkedStoreConditional() && | 245 assert(TLI->hasLoadLinkedStoreConditional() && |
| 235 "TargetLowering requested we expand AtomicRMW instruction into " | 246 "TargetLowering requested we expand AtomicRMW instruction into " |
| 236 "load-linked/store-conditional combos, but such instructions aren't " | 247 "load-linked/store-conditional combos, but such instructions aren't " |
| 237 "supported"); | 248 "supported"); |
| 238 | 249 |
| 239 return expandAtomicRMWToLLSC(AI); | 250 return expandAtomicRMWToLLSC(AI); |
| 240 } | 251 } |
| 241 case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: { | 252 case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: { |
| 242 return expandAtomicRMWToCmpXchg(AI); | 253 return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); |
| 243 } | 254 } |
| 244 } | 255 } |
| 245 llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); | 256 llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); |
| 246 } | 257 } |
| 247 | 258 |
| 248 /// Emit IR to implement the given atomicrmw operation on values in registers, | 259 /// Emit IR to implement the given atomicrmw operation on values in registers, |
| 249 /// returning the new value. | 260 /// returning the new value. |
| 250 static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, | 261 static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, |
| 251 Value *Loaded, Value *Inc) { | 262 Value *Loaded, Value *Inc) { |
| 252 Value *NewVal; | 263 Value *NewVal; |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 330 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); | 341 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); |
| 331 | 342 |
| 332 Builder.SetInsertPoint(ExitBB, ExitBB->begin()); | 343 Builder.SetInsertPoint(ExitBB, ExitBB->begin()); |
| 333 | 344 |
| 334 AI->replaceAllUsesWith(Loaded); | 345 AI->replaceAllUsesWith(Loaded); |
| 335 AI->eraseFromParent(); | 346 AI->eraseFromParent(); |
| 336 | 347 |
| 337 return true; | 348 return true; |
| 338 } | 349 } |
| 339 | 350 |
| 340 bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) { | |
| 341 AtomicOrdering MemOpOrder = | |
| 342 AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); | |
| 343 Value *Addr = AI->getPointerOperand(); | |
| 344 BasicBlock *BB = AI->getParent(); | |
| 345 Function *F = BB->getParent(); | |
| 346 LLVMContext &Ctx = F->getContext(); | |
| 347 | |
| 348 // Given: atomicrmw some_op iN* %addr, iN %incr ordering | |
| 349 // | |
| 350 // The standard expansion we produce is: | |
| 351 // [...] | |
| 352 // %init_loaded = load atomic iN* %addr | |
| 353 // br label %loop | |
| 354 // loop: | |
| 355 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] | |
| 356 // %new = some_op iN %loaded, %incr | |
| 357 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new | |
| 358 // %new_loaded = extractvalue { iN, i1 } %pair, 0 | |
| 359 // %success = extractvalue { iN, i1 } %pair, 1 | |
| 360 // br i1 %success, label %atomicrmw.end, label %loop | |
| 361 // atomicrmw.end: | |
| 362 // [...] | |
| 363 BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); | |
| 364 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); | |
| 365 | |
| 366 // This grabs the DebugLoc from AI. | |
| 367 IRBuilder<> Builder(AI); | |
| 368 | |
| 369 // The split call above "helpfully" added a branch at the end of BB (to the | |
| 370 // wrong place), but we want a load. It's easiest to just remove | |
| 371 // the branch entirely. | |
| 372 std::prev(BB->end())->eraseFromParent(); | |
| 373 Builder.SetInsertPoint(BB); | |
| 374 LoadInst *InitLoaded = Builder.CreateLoad(Addr); | |
| 375 // Atomics require at least natural alignment. | |
| 376 InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); | |
| 377 Builder.CreateBr(LoopBB); | |
| 378 | |
| 379 // Start the main loop block now that we've taken care of the preliminaries. | |
| 380 Builder.SetInsertPoint(LoopBB); | |
| 381 PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); | |
| 382 Loaded->addIncoming(InitLoaded, BB); | |
| 383 | |
| 384 Value *NewVal = | |
| 385 performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); | |
| 386 | |
| 387 Value *Pair = Builder.CreateAtomicCmpXchg( | |
| 388 Addr, Loaded, NewVal, MemOpOrder, | |
| 389 AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); | |
| 390 Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); | |
| 391 Loaded->addIncoming(NewLoaded, LoopBB); | |
| 392 | |
| 393 Value *Success = Builder.CreateExtractValue(Pair, 1, "success"); | |
| 394 Builder.CreateCondBr(Success, ExitBB, LoopBB); | |
| 395 | |
| 396 Builder.SetInsertPoint(ExitBB, ExitBB->begin()); | |
| 397 | |
| 398 AI->replaceAllUsesWith(NewLoaded); | |
| 399 AI->eraseFromParent(); | |
| 400 | |
| 401 return true; | |
| 402 } | |
| 403 | |
| 404 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { | 351 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { |
| 405 AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); | 352 AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); |
| 406 AtomicOrdering FailureOrder = CI->getFailureOrdering(); | 353 AtomicOrdering FailureOrder = CI->getFailureOrdering(); |
| 407 Value *Addr = CI->getPointerOperand(); | 354 Value *Addr = CI->getPointerOperand(); |
| 408 BasicBlock *BB = CI->getParent(); | 355 BasicBlock *BB = CI->getParent(); |
| 409 Function *F = BB->getParent(); | 356 Function *F = BB->getParent(); |
| 410 LLVMContext &Ctx = F->getContext(); | 357 LLVMContext &Ctx = F->getContext(); |
| 411 // If getInsertFencesForAtomic() returns true, then the target does not want | 358 // If getInsertFencesForAtomic() returns true, then the target does not want |
| 412 // to deal with memory orders, and emitLeading/TrailingFence should take care | 359 // to deal with memory orders, and emitLeading/TrailingFence should take care |
| 413 // of everything. Otherwise, emitLeading/TrailingFence are no-op and we | 360 // of everything. Otherwise, emitLeading/TrailingFence are no-op and we |
| (...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 555 } | 502 } |
| 556 | 503 |
| 557 bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { | 504 bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { |
| 558 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { | 505 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { |
| 559 if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad)) | 506 if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad)) |
| 560 expandAtomicLoad(ResultingLoad); | 507 expandAtomicLoad(ResultingLoad); |
| 561 return true; | 508 return true; |
| 562 } | 509 } |
| 563 return false; | 510 return false; |
| 564 } | 511 } |
| 512 |
| 513 bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, |
| 514 CreateCmpXchgInstFun CreateCmpXchg) { |
| 515 assert(AI); |
| 516 |
| 517 AtomicOrdering MemOpOrder = |
| 518 AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); |
| 519 Value *Addr = AI->getPointerOperand(); |
| 520 BasicBlock *BB = AI->getParent(); |
| 521 Function *F = BB->getParent(); |
| 522 LLVMContext &Ctx = F->getContext(); |
| 523 |
| 524 // Given: atomicrmw some_op iN* %addr, iN %incr ordering |
| 525 // |
| 526 // The standard expansion we produce is: |
| 527 // [...] |
| 528 // %init_loaded = load atomic iN* %addr |
| 529 // br label %loop |
| 530 // loop: |
| 531 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] |
| 532 // %new = some_op iN %loaded, %incr |
| 533 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new |
| 534 // %new_loaded = extractvalue { iN, i1 } %pair, 0 |
| 535 // %success = extractvalue { iN, i1 } %pair, 1 |
| 536 // br i1 %success, label %atomicrmw.end, label %loop |
| 537 // atomicrmw.end: |
| 538 // [...] |
| 539 BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); |
| 540 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); |
| 541 |
| 542 // This grabs the DebugLoc from AI. |
| 543 IRBuilder<> Builder(AI); |
| 544 |
| 545 // The split call above "helpfully" added a branch at the end of BB (to the |
| 546 // wrong place), but we want a load. It's easiest to just remove |
| 547 // the branch entirely. |
| 548 std::prev(BB->end())->eraseFromParent(); |
| 549 Builder.SetInsertPoint(BB); |
| 550 LoadInst *InitLoaded = Builder.CreateLoad(Addr); |
| 551 // Atomics require at least natural alignment. |
| 552 InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); |
| 553 Builder.CreateBr(LoopBB); |
| 554 |
| 555 // Start the main loop block now that we've taken care of the preliminaries. |
| 556 Builder.SetInsertPoint(LoopBB); |
| 557 PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); |
| 558 Loaded->addIncoming(InitLoaded, BB); |
| 559 |
| 560 Value *NewVal = |
| 561 performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); |
| 562 |
| 563 Value *NewLoaded = nullptr; |
| 564 Value *Success = nullptr; |
| 565 |
| 566 CreateCmpXchg(Builder, Addr, Loaded, NewVal, MemOpOrder, |
| 567 Success, NewLoaded); |
| 568 assert(Success && NewLoaded); |
| 569 |
| 570 Loaded->addIncoming(NewLoaded, LoopBB); |
| 571 |
| 572 Builder.CreateCondBr(Success, ExitBB, LoopBB); |
| 573 |
| 574 Builder.SetInsertPoint(ExitBB, ExitBB->begin()); |
| 575 |
| 576 AI->replaceAllUsesWith(NewLoaded); |
| 577 AI->eraseFromParent(); |
| 578 |
| 579 return true; |
| 580 } |
| OLD | NEW |