Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(23)

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 1169493002: Subzero: Improve/refactor folding loads into the next instruction. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Code review changes Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/IceTargetLoweringX8632.cpp
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index bdae39451bf3e727e8d775d93c5f815cf42bb96a..45d68921d0c36cf421538f6d3b16deb2c05b9cf9 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -482,6 +482,7 @@ void TargetX8632::translateO2() {
return;
Func->dump("After x86 address mode opt");
+ doLoadOpt();
Func->genCode();
if (Func->hasError())
return;
@@ -572,6 +573,126 @@ void TargetX8632::translateOm1() {
}
}
+namespace {
+
+// Converts a ConstantInteger32 operand into its constant value, or
+// MemoryOrderInvalid if the operand is not a ConstantInteger32.
+uint64_t getConstantMemoryOrder(Operand *Opnd) {
+ if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
+ return Integer->getValue();
+ return Intrinsics::MemoryOrderInvalid;
+}
+
+// Determines whether the dest of a Load instruction can be folded
+// into one of the src operands of a 2-operand instruction. This is
+// true as long as the load dest matches exactly one of the binary
+// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if
+// the answer is true.
+bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
+ Operand *&Src0, Operand *&Src1) {
+ if (Src0 == LoadDest && Src1 != LoadDest) {
+ Src0 = LoadSrc;
+ return true;
+ }
+ if (Src0 != LoadDest && Src1 == LoadDest) {
+ Src1 = LoadSrc;
+ return true;
+ }
+ return false;
+}
+
+} // end of anonymous namespace
+
+void TargetX8632::doLoadOpt() {
+ for (CfgNode *Node : Func->getNodes()) {
+ Context.init(Node);
+ while (!Context.atEnd()) {
+ Variable *LoadDest = nullptr;
+ Operand *LoadSrc = nullptr;
+ Inst *CurInst = Context.getCur();
+ Inst *Next = Context.getNextInst();
+ // Determine whether the current instruction is a Load
+ // instruction or equivalent.
+ if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
+ // An InstLoad always qualifies.
+ LoadDest = Load->getDest();
+ const bool DoLegalize = false;
+ LoadSrc = formMemoryOperand(Load->getSourceAddress(),
+ LoadDest->getType(), DoLegalize);
+ } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
+ // An AtomicLoad intrinsic qualifies as long as it has a valid
+ // memory ordering, and can be implemented in a single
+ // instruction (i.e., not i64).
+ Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
+ if (ID == Intrinsics::AtomicLoad &&
+ Intrin->getDest()->getType() != IceType_i64 &&
+ Intrinsics::isMemoryOrderValid(
+ ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
+ LoadDest = Intrin->getDest();
+ const bool DoLegalize = false;
+ LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
+ DoLegalize);
+ }
+ }
+ // A Load instruction can be folded into the following
+ // instruction only if the following instruction ends the Load's
+ // Dest variable's live range.
+ if (LoadDest && Next && Next->isLastUse(LoadDest)) {
+ assert(LoadSrc);
+ Inst *NewInst = nullptr;
+ if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) {
+ Operand *Src0 = Arith->getSrc(0);
+ Operand *Src1 = Arith->getSrc(1);
+ if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
+ NewInst = InstArithmetic::create(Func, Arith->getOp(),
+ Arith->getDest(), Src0, Src1);
+ }
+ } else if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Next)) {
+ Operand *Src0 = Icmp->getSrc(0);
+ Operand *Src1 = Icmp->getSrc(1);
+ if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
+ NewInst = InstIcmp::create(Func, Icmp->getCondition(),
+ Icmp->getDest(), Src0, Src1);
+ }
+ } else if (auto *Fcmp = llvm::dyn_cast<InstFcmp>(Next)) {
+ Operand *Src0 = Fcmp->getSrc(0);
+ Operand *Src1 = Fcmp->getSrc(1);
+ if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
+ NewInst = InstFcmp::create(Func, Fcmp->getCondition(),
+ Fcmp->getDest(), Src0, Src1);
+ }
+ } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) {
+ Operand *Src0 = Select->getTrueOperand();
+ Operand *Src1 = Select->getFalseOperand();
+ if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
+ NewInst = InstSelect::create(Func, Select->getDest(),
+ Select->getCondition(), Src0, Src1);
+ }
+ } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {
+ // The load dest can always be folded into a Cast
+ // instruction.
+ Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));
+ if (Src0 == LoadDest) {
+ NewInst = InstCast::create(Func, Cast->getCastKind(),
+ Cast->getDest(), LoadSrc);
+ }
+ }
+ if (NewInst) {
+ CurInst->setDeleted();
+ Next->setDeleted();
+ Context.insert(NewInst);
+ // Update NewInst->LiveRangesEnded so that target lowering
+ // may benefit. Also update NewInst->HasSideEffects.
+ NewInst->spliceLivenessInfo(Next, CurInst);
+ }
+ }
+ Context.advanceCur();
+ Context.advanceNext();
+ }
+ }
+ Func->dump("After load optimization");
+}
+
bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) {
if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {
return Br->optimizeBranch(NextNode);
@@ -804,15 +925,15 @@ void TargetX8632::addProlog(CfgNode *Node) {
// that stack slot.
std::function<bool(Variable *)> TargetVarHook =
[&VariablesLinkedToSpillSlots](Variable *Var) {
- if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) {
- assert(Var->getWeight().isZero());
- if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
- VariablesLinkedToSpillSlots.push_back(Var);
- return true;
- }
- }
- return false;
- };
+ if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) {
+ assert(Var->getWeight().isZero());
+ if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
+ VariablesLinkedToSpillSlots.push_back(Var);
+ return true;
+ }
+ }
+ return false;
+ };
// Compute the list of spilled variables and bounds for GlobalsSize, etc.
getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
@@ -1170,6 +1291,10 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
Variable *Dest = Inst->getDest();
Operand *Src0 = legalize(Inst->getSrc(0));
Operand *Src1 = legalize(Inst->getSrc(1));
+ if (Inst->isCommutative()) {
+ if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))
+ std::swap(Src0, Src1);
+ }
if (Dest->getType() == IceType_i64) {
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
@@ -2891,18 +3016,6 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
}
}
-namespace {
-
-// Converts a ConstantInteger32 operand into its constant value, or
-// MemoryOrderInvalid if the operand is not a ConstantInteger32.
-uint64_t getConstantMemoryOrder(Operand *Opnd) {
- if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
- return Integer->getValue();
- return Intrinsics::MemoryOrderInvalid;
-}
-
-} // end of anonymous namespace
-
void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
case Intrinsics::AtomicCmpxchg: {
@@ -3006,10 +3119,11 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
Func->setError("Unexpected memory ordering for AtomicRMW");
return;
}
- lowerAtomicRMW(Instr->getDest(),
- static_cast<uint32_t>(llvm::cast<ConstantInteger32>(
- Instr->getArg(0))->getValue()),
- Instr->getArg(1), Instr->getArg(2));
+ lowerAtomicRMW(
+ Instr->getDest(),
+ static_cast<uint32_t>(
+ llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
+ Instr->getArg(1), Instr->getArg(2));
return;
case Intrinsics::AtomicStore: {
if (!Intrinsics::isMemoryOrderValid(
@@ -3852,66 +3966,9 @@ void TargetX8632::lowerLoad(const InstLoad *Load) {
// OperandX8632Mem operand. Note that the address mode
// optimization already creates an OperandX8632Mem operand, so it
// doesn't need another level of transformation.
- Type Ty = Load->getDest()->getType();
- Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
-
- // Fuse this load with a subsequent Arithmetic instruction in the
- // following situations:
- // a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b
- // a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true
- //
- // Fuse this load with a subsequent Cast instruction:
- // a=[mem]; b=cast(a) ==> b=cast([mem]) if last use of a
- //
- // TODO: Clean up and test thoroughly.
- // (E.g., if there is an mfence-all make sure the load ends up on the
- // same side of the fence).
- //
- // TODO: Why limit to Arithmetic instructions? This could probably be
- // applied to most any instruction type. Look at all source operands
- // in the following instruction, and if there is one instance of the
- // load instruction's dest variable, and that instruction ends that
- // variable's live range, then make the substitution. Deal with
- // commutativity optimization in the arithmetic instruction lowering.
- //
- // TODO(stichnot): Do load fusing as a separate pass. Run it before
- // the bool folding pass. Modify Ice::Inst to allow src operands to
- // be replaced, including updating Inst::LiveRangesEnded, to avoid
- // having to manually mostly clone each instruction type.
- Inst *NextInst = Context.getNextInst();
Variable *DestLoad = Load->getDest();
- if (NextInst && NextInst->isLastUse(DestLoad)) {
- if (auto *Arith = llvm::dyn_cast<InstArithmetic>(NextInst)) {
- InstArithmetic *NewArith = nullptr;
- Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0));
- Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1));
- if (Src1Arith == DestLoad && DestLoad != Src0Arith) {
- NewArith = InstArithmetic::create(
- Func, Arith->getOp(), Arith->getDest(), Arith->getSrc(0), Src0);
- } else if (Src0Arith == DestLoad && Arith->isCommutative() &&
- DestLoad != Src1Arith) {
- NewArith = InstArithmetic::create(
- Func, Arith->getOp(), Arith->getDest(), Arith->getSrc(1), Src0);
- }
- if (NewArith) {
- Arith->setDeleted();
- Context.advanceNext();
- lowerArithmetic(NewArith);
- return;
- }
- } else if (auto *Cast = llvm::dyn_cast<InstCast>(NextInst)) {
- Variable *Src0Cast = llvm::dyn_cast<Variable>(Cast->getSrc(0));
- if (Src0Cast == DestLoad) {
- InstCast *NewCast =
- InstCast::create(Func, Cast->getCastKind(), Cast->getDest(), Src0);
- Cast->setDeleted();
- Context.advanceNext();
- lowerCast(NewCast);
- return;
- }
- }
- }
-
+ Type Ty = DestLoad->getType();
+ Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
lowerAssign(Assign);
}
@@ -4639,7 +4696,8 @@ Operand *TargetX8632::legalizeSrc0ForCmp(Operand *Src0, Operand *Src1) {
return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
}
-OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Operand, Type Ty) {
+OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Operand, Type Ty,
+ bool DoLegalize) {
OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);
// It may be the case that address mode optimization already creates
// an OperandX8632Mem, so in that case it wouldn't need another level
@@ -4656,7 +4714,7 @@ OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Operand, Type Ty) {
}
Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);
}
- return llvm::cast<OperandX8632Mem>(legalize(Mem));
+ return llvm::cast<OperandX8632Mem>(DoLegalize ? legalize(Mem) : Mem);
}
Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698