Chromium Code Reviews| Index: src/IceTargetLoweringX8632.cpp |
| diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
| index b686579527410ac67ff980e967aa5e0e22f3b37e..f0981f9c05e761a96b60065890e9867c177dc0ea 100644 |
| --- a/src/IceTargetLoweringX8632.cpp |
| +++ b/src/IceTargetLoweringX8632.cpp |
| @@ -461,6 +461,8 @@ void TargetX8632::translateO2() { |
| Func->dump("After Phi lowering"); |
| } |
| + findRMW(); |
| + |
| // Address mode optimization. |
| Func->getVMetadata()->init(VMK_SingleDefs); |
| Func->doAddressOpt(); |
| @@ -579,6 +581,120 @@ void TargetX8632::translateOm1() { |
| namespace { |
| +bool canRMW(const InstArithmetic *Arith) { |
| + Type Ty = Arith->getDest()->getType(); |
| + bool isI64 = Ty == IceType_i64; |
| + bool isVector = isVectorType(Ty); |
| + |
| + switch (Arith->getOp()) { |
| + default: |
| + return false; |
| + case InstArithmetic::Add: |
| + return !isI64 && !isVector; // TODO(stichnot): implement |
|
jvoung (off chromium)
2015/06/16 17:59:22
not TODO for Add anymore?
Jim Stichnoth
2015/06/17 00:15:40
I meant implement the i64 and vector variants. Cl
|
| + case InstArithmetic::Sub: |
| + case InstArithmetic::And: |
| + case InstArithmetic::Or: |
| + case InstArithmetic::Xor: |
| + case InstArithmetic::Fadd: |
| + case InstArithmetic::Fsub: |
| + case InstArithmetic::Fmul: |
| + case InstArithmetic::Fdiv: |
| + return false; // TODO(stichnot): implement |
| + return true; |
| + case InstArithmetic::Shl: |
| + case InstArithmetic::Lshr: |
| + case InstArithmetic::Ashr: |
| + return false; // TODO(stichnot): implement |
| + return !isI64 && !isVector; |
| + } |
| + // Not handled for lack of simple lowering: |
| + // shift on i64 and vectors |
|
jvoung (off chromium)
2015/06/16 17:59:22
Maybe put this earlier, to explain the "return !is
Jim Stichnoth
2015/06/17 00:15:40
Done.
|
| + // mul, udiv, urem, sdiv, srem, frem |
| +} |
| + |
| +} // end of anonymous namespace |
| + |
| +void TargetX8632::findRMW() { |
| + OstreamLocker L(Func->getContext()); |
| + Ostream &Str = Func->getContext()->getStrDump(); |
| + for (CfgNode *Node : Func->getNodes()) { |
| + // Walk through the instructions, considering each sequence of 3 |
| + // instructions, and look for the particular RMW pattern. Note that this |
| + // search can be "broken" (false negatives) if there are intervening deleted |
| + // instructions, or intervening instructions that could be safely moved out |
| + // of the way to reveal an RMW pattern. |
| + auto E = Node->getInsts().end(); |
| + auto I1 = E, I2 = E, I3 = Node->getInsts().begin(); |
| + for (; I3 != E; I1 = I2, I2 = I3, ++I3) { |
| + if (I1 == E || I2 == E) |
| + continue; |
| + if (I1->isDeleted() || I2->isDeleted() || I3->isDeleted()) |
| + continue; |
| + if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) { |
| + if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) { |
| + if (auto *Store = llvm::dyn_cast<InstStore>(I3)) { |
| + // Look for: |
| + // a = Load addr |
| + // b = <op> a, other |
| + // Store b, addr |
| + // Change to: |
| + // a = Load addr |
| + // b = <op> a, other |
| + // x = FakeDef |
| + // RMW <op>, addr, other, x |
| + // b = Store b, addr, x |
|
jvoung (off chromium)
2015/06/16 17:59:21
Is this redefinition of b from " b = Store b, addr
Jim Stichnoth
2015/06/17 00:15:40
Yes, clarified in the comment.
|
| + // |
| + // With this transformation, the Store instruction acquires a Dest |
| + // variable and is now subject to dead code elimination if there are |
| + // no more uses of "b". Variable "x" is a beacon for determining |
| + // whether the Store instruction gets dead-code eliminated. If the |
| + // Store instruction is eliminated, then it must be the case that |
| + // the RMW instruction ends x's live range, and therefore the RMW |
| + // instruction will be retained and later lowered. On the other |
| + // hand, if the RMW instruction does not end x's live range, then |
| + // the Store instruction must still be present, and therefore the |
| + // RMW instruction is ignored during lowering because it is |
| + // redundant with the Store instruction. |
| + // |
| + // Note that if "a" has further uses, the RMW transformation may |
| + // still trigger, resulting in two loads and one store, which is |
| + // worse than the original one load and one store. However, this is |
| + // probably rare, and caching probably keeps it just as fast. |
| + if (Load->getSourceAddress() != Store->getAddr()) |
| + continue; |
| + if (Arith->getSrc(0) != Load->getDest()) |
| + continue; |
| + if (Arith->getDest() != Store->getData()) |
| + continue; |
| + if (!canRMW(Arith)) |
| + continue; |
| + if (Func->isVerbose(IceV_RMW)) { |
| + Str << "Found RMW in " << Func->getFunctionName() << ":\n "; |
| + Load->dump(Func); |
| + Str << "\n "; |
| + Arith->dump(Func); |
| + Str << "\n "; |
| + Store->dump(Func); |
| + Str << "\n"; |
| + } |
| + Variable *Beacon = Func->makeVariable(IceType_i32); |
| + Beacon->setWeight(0); |
| + Store->setRmwBeacon(Beacon); |
| + InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon); |
| + Node->getInsts().insert(I3, BeaconDef); |
| + InstX8632FakeRMW *RMW = InstX8632FakeRMW::create( |
| + Func, Arith->getSrc(1), Store->getAddr(), Beacon, |
| + Arith->getOp()); |
| + Node->getInsts().insert(I3, RMW); |
|
jvoung (off chromium)
2015/06/16 17:59:21
Just checking: should this be insert(I3, RMW) or i
Jim Stichnoth
2015/06/17 00:15:40
This is as intended. First we insert the BeaconDe
|
| + } |
| + } |
| + } |
| + } |
| + } |
| +} |
| + |
| +namespace { |
| + |
| // Converts a ConstantInteger32 operand into its constant value, or |
| // MemoryOrderInvalid if the operand is not a ConstantInteger32. |
| uint64_t getConstantMemoryOrder(Operand *Opnd) { |
| @@ -4395,7 +4511,10 @@ void TargetX8632::doAddressOptStore() { |
| Constant *OffsetOp = Ctx->getConstantInt32(Offset); |
| Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, |
| Shift, SegmentReg); |
| - Context.insert(InstStore::create(Func, Data, Addr)); |
| + InstStore *NewStore = InstStore::create(Func, Data, Addr); |
| + if (Inst->getDest()) |
| + NewStore->setRmwBeacon(Inst->getRmwBeacon()); |
| + Context.insert(NewStore); |
| } |
| } |
| @@ -4498,6 +4617,46 @@ void TargetX8632::eliminateNextVectorSextInstruction( |
| void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); } |
| +void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) { |
| + // If the beacon variable's live range does not end in this |
| + // instruction, then it must end in the modified Store instruction |
| + // that follows. This means that the original Store instruction is |
| + // still there, either because the value being stored is used beyond |
| + // the Store instruction, or because dead code elimination did not |
| + // happen. In either case, we cancel RMW lowering (and the caller |
| + // deletes the RMW instruction). |
| + if (!RMW->isLastUse(RMW->getBeacon())) |
| + return; |
| + Operand *Src = RMW->getData(); |
| + Type Ty = Src->getType(); |
| + Operand *Addr = formMemoryOperand(RMW->getAddr(), Ty); |
| + if (Ty == IceType_i64) { |
| + // TODO(stichnot): Implement. |
| + } else if (isVectorType(Ty)) { |
| + // TODO(stichnot): Implement. |
| + } else { |
| + // i8, i16, i32, f32, f64 |
| + switch (RMW->getOp()) { |
| + default: |
| + // TODO(stichnot): Implement other arithmetic operators. |
| + break; |
| + case InstArithmetic::Add: |
| + Src = legalize(Src, Legal_Reg | Legal_Imm); |
| + _add_rmw(Addr, Src); |
| + return; |
| + } |
| + } |
| + llvm::report_fatal_error("Couldn't lower RMW instruction"); |
| +} |
| + |
| +void TargetX8632::lowerOther(const Inst *Instr) { |
| + if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) { |
| + lowerRMW(RMW); |
| + } else { |
| + TargetLowering::lowerOther(Instr); |
| + } |
| +} |
| + |
| // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to |
| // preserve integrity of liveness analysis. Undef values are also |
| // turned into zeroes, since loOperand() and hiOperand() don't expect |