| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 473 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 484 MemA->getIndex() == MemB->getIndex() && | 484 MemA->getIndex() == MemB->getIndex() && |
| 485 MemA->getShift() == MemB->getShift() && | 485 MemA->getShift() == MemB->getShift() && |
| 486 MemA->getSegmentRegister() == MemB->getSegmentRegister(); | 486 MemA->getSegmentRegister() == MemB->getSegmentRegister(); |
| 487 } | 487 } |
| 488 } | 488 } |
| 489 return false; | 489 return false; |
| 490 } | 490 } |
| 491 | 491 |
| 492 template <class Machine> void TargetX86Base<Machine>::findRMW() { | 492 template <class Machine> void TargetX86Base<Machine>::findRMW() { |
| 493 Func->dump("Before RMW"); | 493 Func->dump("Before RMW"); |
| 494 OstreamLocker L(Func->getContext()); | 494 if (Func->isVerbose(IceV_RMW)) |
| 495 Ostream &Str = Func->getContext()->getStrDump(); | 495 Func->getContext()->lockStr(); |
| 496 for (CfgNode *Node : Func->getNodes()) { | 496 for (CfgNode *Node : Func->getNodes()) { |
| 497 // Walk through the instructions, considering each sequence of 3 | 497 // Walk through the instructions, considering each sequence of 3 |
| 498 // instructions, and look for the particular RMW pattern. Note that this | 498 // instructions, and look for the particular RMW pattern. Note that this |
| 499 // search can be "broken" (false negatives) if there are intervening | 499 // search can be "broken" (false negatives) if there are intervening |
| 500 // deleted instructions, or intervening instructions that could be safely | 500 // deleted instructions, or intervening instructions that could be safely |
| 501 // moved out of the way to reveal an RMW pattern. | 501 // moved out of the way to reveal an RMW pattern. |
| 502 auto E = Node->getInsts().end(); | 502 auto E = Node->getInsts().end(); |
| 503 auto I1 = E, I2 = E, I3 = Node->getInsts().begin(); | 503 auto I1 = E, I2 = E, I3 = Node->getInsts().begin(); |
| 504 for (; I3 != E; I1 = I2, I2 = I3, ++I3) { | 504 for (; I3 != E; I1 = I2, I2 = I3, ++I3) { |
| 505 // Make I3 skip over deleted instructions. | 505 // Make I3 skip over deleted instructions. |
| 506 while (I3 != E && I3->isDeleted()) | 506 while (I3 != E && I3->isDeleted()) |
| 507 ++I3; | 507 ++I3; |
| 508 if (I1 == E || I2 == E || I3 == E) | 508 if (I1 == E || I2 == E || I3 == E) |
| 509 continue; | 509 continue; |
| 510 assert(!I1->isDeleted()); | 510 assert(!I1->isDeleted()); |
| 511 assert(!I2->isDeleted()); | 511 assert(!I2->isDeleted()); |
| 512 assert(!I3->isDeleted()); | 512 assert(!I3->isDeleted()); |
| 513 if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) { | 513 auto *Load = llvm::dyn_cast<InstLoad>(I1); |
| 514 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) { | 514 auto *Arith = llvm::dyn_cast<InstArithmetic>(I2); |
| 515 if (auto *Store = llvm::dyn_cast<InstStore>(I3)) { | 515 auto *Store = llvm::dyn_cast<InstStore>(I3); |
| 516 // Look for: | 516 if (!Load || !Arith || !Store) |
| 517 // a = Load addr | 517 continue; |
| 518 // b = <op> a, other | 518 // Look for: |
| 519 // Store b, addr | 519 // a = Load addr |
| 520 // Change to: | 520 // b = <op> a, other |
| 521 // a = Load addr | 521 // Store b, addr |
| 522 // b = <op> a, other | 522 // Change to: |
| 523 // x = FakeDef | 523 // a = Load addr |
| 524 // RMW <op>, addr, other, x | 524 // b = <op> a, other |
| 525 // b = Store b, addr, x | 525 // x = FakeDef |
| 526 // Note that inferTwoAddress() makes sure setDestNonKillable() gets | 526 // RMW <op>, addr, other, x |
| 527 // called on the updated Store instruction, to avoid liveness | 527 // b = Store b, addr, x |
| 528 // problems later. | 528 // Note that inferTwoAddress() makes sure setDestNonKillable() gets |
| 529 // | 529 // called on the updated Store instruction, to avoid liveness problems |
| 530 // With this transformation, the Store instruction acquires a Dest | 530 // later. |
| 531 // variable and is now subject to dead code elimination if there | 531 // |
| 532 // are no more uses of "b". Variable "x" is a beacon for | 532 // With this transformation, the Store instruction acquires a Dest |
| 533 // determining whether the Store instruction gets dead-code | 533 // variable and is now subject to dead code elimination if there are no |
| 534 // eliminated. If the Store instruction is eliminated, then it | 534 // more uses of "b". Variable "x" is a beacon for determining whether |
| 535 // must be the case that the RMW instruction ends x's live range, | 535 // the Store instruction gets dead-code eliminated. If the Store |
| 536 // and therefore the RMW instruction will be retained and later | 536 // instruction is eliminated, then it must be the case that the RMW |
| 537 // lowered. On the other hand, if the RMW instruction does not end | 537 // instruction ends x's live range, and therefore the RMW instruction |
| 538 // x's live range, then the Store instruction must still be | 538 // will be retained and later lowered. On the other hand, if the RMW |
| 539 // present, and therefore the RMW instruction is ignored during | 539 // instruction does not end x's live range, then the Store instruction |
| 540 // lowering because it is redundant with the Store instruction. | 540 // must still be present, and therefore the RMW instruction is ignored |
| 541 // | 541 // during lowering because it is redundant with the Store instruction. |
| 542 // Note that if "a" has further uses, the RMW transformation may | 542 // |
| 543 // still trigger, resulting in two loads and one store, which is | 543 // Note that if "a" has further uses, the RMW transformation may still |
| 544 // worse than the original one load and one store. However, this | 544 // trigger, resulting in two loads and one store, which is worse than the |
| 545 // is probably rare, and caching probably keeps it just as fast. | 545 // original one load and one store. However, this is probably rare, and |
| 546 if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(), | 546 // caching probably keeps it just as fast. |
| 547 Store->getAddr())) | 547 if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(), |
| 548 continue; | 548 Store->getAddr())) |
| 549 Operand *ArithSrcFromLoad = Arith->getSrc(0); | 549 continue; |
| 550 Operand *ArithSrcOther = Arith->getSrc(1); | 550 Operand *ArithSrcFromLoad = Arith->getSrc(0); |
| 551 if (ArithSrcFromLoad != Load->getDest()) { | 551 Operand *ArithSrcOther = Arith->getSrc(1); |
| 552 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest()) | 552 if (ArithSrcFromLoad != Load->getDest()) { |
| 553 continue; | 553 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest()) |
| 554 std::swap(ArithSrcFromLoad, ArithSrcOther); | 554 continue; |
| 555 } | 555 std::swap(ArithSrcFromLoad, ArithSrcOther); |
| 556 if (Arith->getDest() != Store->getData()) | |
| 557 continue; | |
| 558 if (!canRMW(Arith)) | |
| 559 continue; | |
| 560 if (Func->isVerbose(IceV_RMW)) { | |
| 561 Str << "Found RMW in " << Func->getFunctionName() << ":\n "; | |
| 562 Load->dump(Func); | |
| 563 Str << "\n "; | |
| 564 Arith->dump(Func); | |
| 565 Str << "\n "; | |
| 566 Store->dump(Func); | |
| 567 Str << "\n"; | |
| 568 } | |
| 569 Variable *Beacon = Func->makeVariable(IceType_i32); | |
| 570 Beacon->setMustNotHaveReg(); | |
| 571 Store->setRmwBeacon(Beacon); | |
| 572 InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon); | |
| 573 Node->getInsts().insert(I3, BeaconDef); | |
| 574 auto *RMW = Traits::Insts::FakeRMW::create( | |
| 575 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp()); | |
| 576 Node->getInsts().insert(I3, RMW); | |
| 577 } | |
| 578 } | |
| 579 } | 556 } |
| 557 if (Arith->getDest() != Store->getData()) |
| 558 continue; |
| 559 if (!canRMW(Arith)) |
| 560 continue; |
| 561 if (Func->isVerbose(IceV_RMW)) { |
| 562 Ostream &Str = Func->getContext()->getStrDump(); |
| 563 Str << "Found RMW in " << Func->getFunctionName() << ":\n "; |
| 564 Load->dump(Func); |
| 565 Str << "\n "; |
| 566 Arith->dump(Func); |
| 567 Str << "\n "; |
| 568 Store->dump(Func); |
| 569 Str << "\n"; |
| 570 } |
| 571 Variable *Beacon = Func->makeVariable(IceType_i32); |
| 572 Beacon->setMustNotHaveReg(); |
| 573 Store->setRmwBeacon(Beacon); |
| 574 InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon); |
| 575 Node->getInsts().insert(I3, BeaconDef); |
| 576 auto *RMW = Traits::Insts::FakeRMW::create( |
| 577 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp()); |
| 578 Node->getInsts().insert(I3, RMW); |
| 580 } | 579 } |
| 581 } | 580 } |
| 581 if (Func->isVerbose(IceV_RMW)) |
| 582 Func->getContext()->unlockStr(); |
| 582 } | 583 } |
| 583 | 584 |
| 584 // Converts a ConstantInteger32 operand into its constant value, or | 585 // Converts a ConstantInteger32 operand into its constant value, or |
| 585 // MemoryOrderInvalid if the operand is not a ConstantInteger32. | 586 // MemoryOrderInvalid if the operand is not a ConstantInteger32. |
| 586 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { | 587 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { |
| 587 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 588 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
| 588 return Integer->getValue(); | 589 return Integer->getValue(); |
| 589 return Intrinsics::MemoryOrderInvalid; | 590 return Intrinsics::MemoryOrderInvalid; |
| 590 } | 591 } |
| 591 | 592 |
| (...skipping 4801 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5393 } | 5394 } |
| 5394 // the offset is not eligible for blinding or pooling, return the original | 5395 // the offset is not eligible for blinding or pooling, return the original |
| 5395 // mem operand | 5396 // mem operand |
| 5396 return MemOperand; | 5397 return MemOperand; |
| 5397 } | 5398 } |
| 5398 | 5399 |
| 5399 } // end of namespace X86Internal | 5400 } // end of namespace X86Internal |
| 5400 } // end of namespace Ice | 5401 } // end of namespace Ice |
| 5401 | 5402 |
| 5402 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5403 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |