Index: src/IceTargetLoweringX8632.cpp |
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
index b686579527410ac67ff980e967aa5e0e22f3b37e..f0981f9c05e761a96b60065890e9867c177dc0ea 100644 |
--- a/src/IceTargetLoweringX8632.cpp |
+++ b/src/IceTargetLoweringX8632.cpp |
@@ -461,6 +461,8 @@ void TargetX8632::translateO2() { |
Func->dump("After Phi lowering"); |
} |
+ findRMW(); |
+ |
// Address mode optimization. |
Func->getVMetadata()->init(VMK_SingleDefs); |
Func->doAddressOpt(); |
@@ -579,6 +581,120 @@ void TargetX8632::translateOm1() { |
namespace { |
+bool canRMW(const InstArithmetic *Arith) { |
+ Type Ty = Arith->getDest()->getType(); |
+ bool isI64 = Ty == IceType_i64; |
+ bool isVector = isVectorType(Ty); |
+ |
+ switch (Arith->getOp()) { |
+ default: |
+ return false; |
+ case InstArithmetic::Add: |
+ return !isI64 && !isVector; // TODO(stichnot): implement |
jvoung (off chromium)
2015/06/16 17:59:22
not TODO for Add anymore?
Jim Stichnoth
2015/06/17 00:15:40
I meant implement the i64 and vector variants. Cl
|
+ case InstArithmetic::Sub: |
+ case InstArithmetic::And: |
+ case InstArithmetic::Or: |
+ case InstArithmetic::Xor: |
+ case InstArithmetic::Fadd: |
+ case InstArithmetic::Fsub: |
+ case InstArithmetic::Fmul: |
+ case InstArithmetic::Fdiv: |
+ return false; // TODO(stichnot): implement |
+ return true; |
+ case InstArithmetic::Shl: |
+ case InstArithmetic::Lshr: |
+ case InstArithmetic::Ashr: |
+ return false; // TODO(stichnot): implement |
+ return !isI64 && !isVector; |
+ } |
+ // Not handled for lack of simple lowering: |
+ // shift on i64 and vectors |
jvoung (off chromium)
2015/06/16 17:59:22
Maybe put this earlier, to explain the "return !is
Jim Stichnoth
2015/06/17 00:15:40
Done.
|
+ // mul, udiv, urem, sdiv, srem, frem |
+} |
+ |
+} // end of anonymous namespace |
+ |
+void TargetX8632::findRMW() { |
+ OstreamLocker L(Func->getContext()); |
+ Ostream &Str = Func->getContext()->getStrDump(); |
+ for (CfgNode *Node : Func->getNodes()) { |
+ // Walk through the instructions, considering each sequence of 3 |
+ // instructions, and look for the particular RMW pattern. Note that this |
+ // search can be "broken" (false negatives) if there are intervening deleted |
+ // instructions, or intervening instructions that could be safely moved out |
+ // of the way to reveal an RMW pattern. |
+ auto E = Node->getInsts().end(); |
+ auto I1 = E, I2 = E, I3 = Node->getInsts().begin(); |
+ for (; I3 != E; I1 = I2, I2 = I3, ++I3) { |
+ if (I1 == E || I2 == E) |
+ continue; |
+ if (I1->isDeleted() || I2->isDeleted() || I3->isDeleted()) |
+ continue; |
+ if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) { |
+ if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) { |
+ if (auto *Store = llvm::dyn_cast<InstStore>(I3)) { |
+ // Look for: |
+ // a = Load addr |
+ // b = <op> a, other |
+ // Store b, addr |
+ // Change to: |
+ // a = Load addr |
+ // b = <op> a, other |
+ // x = FakeDef |
+ // RMW <op>, addr, other, x |
+ // b = Store b, addr, x |
jvoung (off chromium)
2015/06/16 17:59:21
Is this redefinition of b from " b = Store b, addr
Jim Stichnoth
2015/06/17 00:15:40
Yes, clarified in the comment.
|
+ // |
+ // With this transformation, the Store instruction acquires a Dest |
+ // variable and is now subject to dead code elimination if there are |
+ // no more uses of "b". Variable "x" is a beacon for determining |
+ // whether the Store instruction gets dead-code eliminated. If the |
+ // Store instruction is eliminated, then it must be the case that |
+ // the RMW instruction ends x's live range, and therefore the RMW |
+ // instruction will be retained and later lowered. On the other |
+ // hand, if the RMW instruction does not end x's live range, then |
+ // the Store instruction must still be present, and therefore the |
+ // RMW instruction is ignored during lowering because it is |
+ // redundant with the Store instruction. |
+ // |
+ // Note that if "a" has further uses, the RMW transformation may |
+ // still trigger, resulting in two loads and one store, which is |
+ // worse than the original one load and one store. However, this is |
+ // probably rare, and caching probably keeps it just as fast. |
+ if (Load->getSourceAddress() != Store->getAddr()) |
+ continue; |
+ if (Arith->getSrc(0) != Load->getDest()) |
+ continue; |
+ if (Arith->getDest() != Store->getData()) |
+ continue; |
+ if (!canRMW(Arith)) |
+ continue; |
+ if (Func->isVerbose(IceV_RMW)) { |
+ Str << "Found RMW in " << Func->getFunctionName() << ":\n "; |
+ Load->dump(Func); |
+ Str << "\n "; |
+ Arith->dump(Func); |
+ Str << "\n "; |
+ Store->dump(Func); |
+ Str << "\n"; |
+ } |
+ Variable *Beacon = Func->makeVariable(IceType_i32); |
+ Beacon->setWeight(0); |
+ Store->setRmwBeacon(Beacon); |
+ InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon); |
+ Node->getInsts().insert(I3, BeaconDef); |
+ InstX8632FakeRMW *RMW = InstX8632FakeRMW::create( |
+ Func, Arith->getSrc(1), Store->getAddr(), Beacon, |
+ Arith->getOp()); |
+ Node->getInsts().insert(I3, RMW); |
jvoung (off chromium)
2015/06/16 17:59:21
Just checking: should this be insert(I3, RMW) or i
Jim Stichnoth
2015/06/17 00:15:40
This is as intended. First we insert the BeaconDe
|
+ } |
+ } |
+ } |
+ } |
+ } |
+} |
+ |
+namespace { |
+ |
// Converts a ConstantInteger32 operand into its constant value, or |
// MemoryOrderInvalid if the operand is not a ConstantInteger32. |
uint64_t getConstantMemoryOrder(Operand *Opnd) { |
@@ -4395,7 +4511,10 @@ void TargetX8632::doAddressOptStore() { |
Constant *OffsetOp = Ctx->getConstantInt32(Offset); |
Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, |
Shift, SegmentReg); |
- Context.insert(InstStore::create(Func, Data, Addr)); |
+ InstStore *NewStore = InstStore::create(Func, Data, Addr); |
+ if (Inst->getDest()) |
+ NewStore->setRmwBeacon(Inst->getRmwBeacon()); |
+ Context.insert(NewStore); |
} |
} |
@@ -4498,6 +4617,46 @@ void TargetX8632::eliminateNextVectorSextInstruction( |
void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); } |
+void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) { |
+ // If the beacon variable's live range does not end in this |
+ // instruction, then it must end in the modified Store instruction |
+ // that follows. This means that the original Store instruction is |
+ // still there, either because the value being stored is used beyond |
+ // the Store instruction, or because dead code elimination did not |
+ // happen. In either case, we cancel RMW lowering (and the caller |
+ // deletes the RMW instruction). |
+ if (!RMW->isLastUse(RMW->getBeacon())) |
+ return; |
+ Operand *Src = RMW->getData(); |
+ Type Ty = Src->getType(); |
+ Operand *Addr = formMemoryOperand(RMW->getAddr(), Ty); |
+ if (Ty == IceType_i64) { |
+ // TODO(stichnot): Implement. |
+ } else if (isVectorType(Ty)) { |
+ // TODO(stichnot): Implement. |
+ } else { |
+ // i8, i16, i32, f32, f64 |
+ switch (RMW->getOp()) { |
+ default: |
+ // TODO(stichnot): Implement other arithmetic operators. |
+ break; |
+ case InstArithmetic::Add: |
+ Src = legalize(Src, Legal_Reg | Legal_Imm); |
+ _add_rmw(Addr, Src); |
+ return; |
+ } |
+ } |
+ llvm::report_fatal_error("Couldn't lower RMW instruction"); |
+} |
+ |
+void TargetX8632::lowerOther(const Inst *Instr) { |
+ if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) { |
+ lowerRMW(RMW); |
+ } else { |
+ TargetLowering::lowerOther(Instr); |
+ } |
+} |
+ |
// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to |
// preserve integrity of liveness analysis. Undef values are also |
// turned into zeroes, since loOperand() and hiOperand() don't expect |