Index: src/IceTargetLoweringX8632.cpp |
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
index bdae39451bf3e727e8d775d93c5f815cf42bb96a..45d68921d0c36cf421538f6d3b16deb2c05b9cf9 100644 |
--- a/src/IceTargetLoweringX8632.cpp |
+++ b/src/IceTargetLoweringX8632.cpp |
@@ -482,6 +482,7 @@ void TargetX8632::translateO2() { |
return; |
Func->dump("After x86 address mode opt"); |
+ doLoadOpt(); |
Func->genCode(); |
if (Func->hasError()) |
return; |
@@ -572,6 +573,126 @@ void TargetX8632::translateOm1() { |
} |
} |
+namespace { |
+ |
+// Converts a ConstantInteger32 operand into its constant value, or |
+// MemoryOrderInvalid if the operand is not a ConstantInteger32. |
+uint64_t getConstantMemoryOrder(Operand *Opnd) { |
+ if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
+ return Integer->getValue(); |
+ return Intrinsics::MemoryOrderInvalid; |
+} |
+ |
+// Determines whether the dest of a Load instruction can be folded |
+// into one of the src operands of a 2-operand instruction. This is |
+// true as long as the load dest matches exactly one of the binary |
+// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if |
+// the answer is true. |
+bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, |
+ Operand *&Src0, Operand *&Src1) { |
+ if (Src0 == LoadDest && Src1 != LoadDest) { |
+ Src0 = LoadSrc; |
+ return true; |
+ } |
+ if (Src0 != LoadDest && Src1 == LoadDest) { |
+ Src1 = LoadSrc; |
+ return true; |
+ } |
+ return false; |
+} |
+ |
+} // end of anonymous namespace |
+ |
+void TargetX8632::doLoadOpt() { |
+ for (CfgNode *Node : Func->getNodes()) { |
+ Context.init(Node); |
+ while (!Context.atEnd()) { |
+ Variable *LoadDest = nullptr; |
+ Operand *LoadSrc = nullptr; |
+ Inst *CurInst = Context.getCur(); |
+ Inst *Next = Context.getNextInst(); |
+ // Determine whether the current instruction is a Load |
+ // instruction or equivalent. |
+ if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { |
+ // An InstLoad always qualifies. |
+ LoadDest = Load->getDest(); |
+ const bool DoLegalize = false; |
+ LoadSrc = formMemoryOperand(Load->getSourceAddress(), |
+ LoadDest->getType(), DoLegalize); |
+ } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { |
+ // An AtomicLoad intrinsic qualifies as long as it has a valid |
+ // memory ordering, and can be implemented in a single |
+ // instruction (i.e., not i64). |
+ Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; |
+ if (ID == Intrinsics::AtomicLoad && |
+ Intrin->getDest()->getType() != IceType_i64 && |
+ Intrinsics::isMemoryOrderValid( |
+ ID, getConstantMemoryOrder(Intrin->getArg(1)))) { |
+ LoadDest = Intrin->getDest(); |
+ const bool DoLegalize = false; |
+ LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), |
+ DoLegalize); |
+ } |
+ } |
+ // A Load instruction can be folded into the following |
+ // instruction only if the following instruction ends the Load's |
+ // Dest variable's live range. |
+ if (LoadDest && Next && Next->isLastUse(LoadDest)) { |
+ assert(LoadSrc); |
+ Inst *NewInst = nullptr; |
+ if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) { |
+ Operand *Src0 = Arith->getSrc(0); |
+ Operand *Src1 = Arith->getSrc(1); |
+ if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) { |
+ NewInst = InstArithmetic::create(Func, Arith->getOp(), |
+ Arith->getDest(), Src0, Src1); |
+ } |
+ } else if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Next)) { |
+ Operand *Src0 = Icmp->getSrc(0); |
+ Operand *Src1 = Icmp->getSrc(1); |
+ if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) { |
+ NewInst = InstIcmp::create(Func, Icmp->getCondition(), |
+ Icmp->getDest(), Src0, Src1); |
+ } |
+ } else if (auto *Fcmp = llvm::dyn_cast<InstFcmp>(Next)) { |
+ Operand *Src0 = Fcmp->getSrc(0); |
+ Operand *Src1 = Fcmp->getSrc(1); |
+ if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) { |
+ NewInst = InstFcmp::create(Func, Fcmp->getCondition(), |
+ Fcmp->getDest(), Src0, Src1); |
+ } |
+ } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) { |
+ Operand *Src0 = Select->getTrueOperand(); |
+ Operand *Src1 = Select->getFalseOperand(); |
+ if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) { |
+ NewInst = InstSelect::create(Func, Select->getDest(), |
+ Select->getCondition(), Src0, Src1); |
+ } |
+ } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) { |
+ // The load dest can always be folded into a Cast |
+ // instruction. |
+ Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0)); |
+ if (Src0 == LoadDest) { |
+ NewInst = InstCast::create(Func, Cast->getCastKind(), |
+ Cast->getDest(), LoadSrc); |
+ } |
+ } |
+ if (NewInst) { |
+ CurInst->setDeleted(); |
+ Next->setDeleted(); |
+ Context.insert(NewInst); |
+ // Update NewInst->LiveRangesEnded so that target lowering |
+ // may benefit. Also update NewInst->HasSideEffects. |
+ NewInst->spliceLivenessInfo(Next, CurInst); |
+ } |
+ } |
+ Context.advanceCur(); |
+ Context.advanceNext(); |
+ } |
+ } |
+ Func->dump("After load optimization"); |
+} |
+ |
bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) { |
if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { |
return Br->optimizeBranch(NextNode); |
@@ -804,15 +925,15 @@ void TargetX8632::addProlog(CfgNode *Node) { |
// that stack slot. |
std::function<bool(Variable *)> TargetVarHook = |
[&VariablesLinkedToSpillSlots](Variable *Var) { |
- if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) { |
- assert(Var->getWeight().isZero()); |
- if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) { |
- VariablesLinkedToSpillSlots.push_back(Var); |
- return true; |
- } |
- } |
- return false; |
- }; |
+ if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) { |
+ assert(Var->getWeight().isZero()); |
+ if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) { |
+ VariablesLinkedToSpillSlots.push_back(Var); |
+ return true; |
+ } |
+ } |
+ return false; |
+ }; |
// Compute the list of spilled variables and bounds for GlobalsSize, etc. |
getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, |
@@ -1170,6 +1291,10 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
Variable *Dest = Inst->getDest(); |
Operand *Src0 = legalize(Inst->getSrc(0)); |
Operand *Src1 = legalize(Inst->getSrc(1)); |
+ if (Inst->isCommutative()) { |
+ if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) |
+ std::swap(Src0, Src1); |
+ } |
if (Dest->getType() == IceType_i64) { |
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
@@ -2891,18 +3016,6 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { |
} |
} |
-namespace { |
- |
-// Converts a ConstantInteger32 operand into its constant value, or |
-// MemoryOrderInvalid if the operand is not a ConstantInteger32. |
-uint64_t getConstantMemoryOrder(Operand *Opnd) { |
- if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
- return Integer->getValue(); |
- return Intrinsics::MemoryOrderInvalid; |
-} |
- |
-} // end of anonymous namespace |
- |
void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) { |
case Intrinsics::AtomicCmpxchg: { |
@@ -3006,10 +3119,11 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
Func->setError("Unexpected memory ordering for AtomicRMW"); |
return; |
} |
- lowerAtomicRMW(Instr->getDest(), |
- static_cast<uint32_t>(llvm::cast<ConstantInteger32>( |
- Instr->getArg(0))->getValue()), |
- Instr->getArg(1), Instr->getArg(2)); |
+ lowerAtomicRMW( |
+ Instr->getDest(), |
+ static_cast<uint32_t>( |
+ llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()), |
+ Instr->getArg(1), Instr->getArg(2)); |
return; |
case Intrinsics::AtomicStore: { |
if (!Intrinsics::isMemoryOrderValid( |
@@ -3852,66 +3966,9 @@ void TargetX8632::lowerLoad(const InstLoad *Load) { |
// OperandX8632Mem operand. Note that the address mode |
// optimization already creates an OperandX8632Mem operand, so it |
// doesn't need another level of transformation. |
- Type Ty = Load->getDest()->getType(); |
- Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); |
- |
- // Fuse this load with a subsequent Arithmetic instruction in the |
- // following situations: |
- // a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b |
- // a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true |
- // |
- // Fuse this load with a subsequent Cast instruction: |
- // a=[mem]; b=cast(a) ==> b=cast([mem]) if last use of a |
- // |
- // TODO: Clean up and test thoroughly. |
- // (E.g., if there is an mfence-all make sure the load ends up on the |
- // same side of the fence). |
- // |
- // TODO: Why limit to Arithmetic instructions? This could probably be |
- // applied to most any instruction type. Look at all source operands |
- // in the following instruction, and if there is one instance of the |
- // load instruction's dest variable, and that instruction ends that |
- // variable's live range, then make the substitution. Deal with |
- // commutativity optimization in the arithmetic instruction lowering. |
- // |
- // TODO(stichnot): Do load fusing as a separate pass. Run it before |
- // the bool folding pass. Modify Ice::Inst to allow src operands to |
- // be replaced, including updating Inst::LiveRangesEnded, to avoid |
- // having to manually mostly clone each instruction type. |
- Inst *NextInst = Context.getNextInst(); |
Variable *DestLoad = Load->getDest(); |
- if (NextInst && NextInst->isLastUse(DestLoad)) { |
- if (auto *Arith = llvm::dyn_cast<InstArithmetic>(NextInst)) { |
- InstArithmetic *NewArith = nullptr; |
- Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0)); |
- Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1)); |
- if (Src1Arith == DestLoad && DestLoad != Src0Arith) { |
- NewArith = InstArithmetic::create( |
- Func, Arith->getOp(), Arith->getDest(), Arith->getSrc(0), Src0); |
- } else if (Src0Arith == DestLoad && Arith->isCommutative() && |
- DestLoad != Src1Arith) { |
- NewArith = InstArithmetic::create( |
- Func, Arith->getOp(), Arith->getDest(), Arith->getSrc(1), Src0); |
- } |
- if (NewArith) { |
- Arith->setDeleted(); |
- Context.advanceNext(); |
- lowerArithmetic(NewArith); |
- return; |
- } |
- } else if (auto *Cast = llvm::dyn_cast<InstCast>(NextInst)) { |
- Variable *Src0Cast = llvm::dyn_cast<Variable>(Cast->getSrc(0)); |
- if (Src0Cast == DestLoad) { |
- InstCast *NewCast = |
- InstCast::create(Func, Cast->getCastKind(), Cast->getDest(), Src0); |
- Cast->setDeleted(); |
- Context.advanceNext(); |
- lowerCast(NewCast); |
- return; |
- } |
- } |
- } |
- |
+ Type Ty = DestLoad->getType(); |
+ Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); |
InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); |
lowerAssign(Assign); |
} |
@@ -4639,7 +4696,8 @@ Operand *TargetX8632::legalizeSrc0ForCmp(Operand *Src0, Operand *Src1) { |
return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); |
} |
-OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Operand, Type Ty) { |
+OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Operand, Type Ty, |
+ bool DoLegalize) { |
OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand); |
// It may be the case that address mode optimization already creates |
// an OperandX8632Mem, so in that case it wouldn't need another level |
@@ -4656,7 +4714,7 @@ OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Operand, Type Ty) { |
} |
Mem = OperandX8632Mem::create(Func, Ty, Base, Offset); |
} |
- return llvm::cast<OperandX8632Mem>(legalize(Mem)); |
+ return llvm::cast<OperandX8632Mem>(DoLegalize ? legalize(Mem) : Mem); |
} |
Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) { |