Index: src/IceTargetLoweringX86BaseImpl.h |
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h |
index 8739c77ac8e99351c19df3736faed4789cbc5be1..0e4482ac89d4a4b335ca03e7e8b671447d5e542d 100644 |
--- a/src/IceTargetLoweringX86BaseImpl.h |
+++ b/src/IceTargetLoweringX86BaseImpl.h |
@@ -2851,15 +2851,19 @@ TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, const InstBr *Br) { |
break; |
case InstIcmp::Eq: |
case InstIcmp::Ule: |
- _mov(Temp, Src0LoRM); |
- _or(Temp, Src0HiRM); |
+ // Mov Src0HiRM first, because it was legalized most recently, |
Jim Stichnoth
2015/10/27 05:52:34
Can you reformat this and other comments to 80-col
sehr
2015/10/27 21:48:01
Done.
|
+ // and will sometimes avoid a move before the OR. |
+ _mov(Temp, Src0HiRM); |
+ _or(Temp, Src0LoRM); |
Context.insert(InstFakeUse::create(Func, Temp)); |
setccOrBr(Traits::Cond::Br_e, Dest, Br); |
return; |
case InstIcmp::Ne: |
case InstIcmp::Ugt: |
- _mov(Temp, Src0LoRM); |
- _or(Temp, Src0HiRM); |
+ // Mov Src0HiRM first, because it was legalized most recently, |
+ // and will sometimes avoid a move before the OR. |
+ _mov(Temp, Src0HiRM); |
+ _or(Temp, Src0LoRM); |
Context.insert(InstFakeUse::create(Func, Temp)); |
setccOrBr(Traits::Cond::Br_ne, Dest, Br); |
return; |
@@ -4101,11 +4105,15 @@ inline bool isAdd(const Inst *Inst) { |
return false; |
} |
-inline void dumpAddressOpt(const Cfg *Func, const Variable *Base, |
+inline void dumpAddressOpt(const Cfg *Func, |
+ const ConstantRelocatable *Relocatable, |
+ int32_t Offset, const Variable *Base, |
const Variable *Index, uint16_t Shift, |
- int32_t Offset, const Inst *Reason) { |
+ const Inst *Reason) { |
+ /* |
Jim Stichnoth
2015/10/27 05:52:34
Why is this commented out?
sehr
2015/10/27 21:48:01
Done.
|
if (!BuildDefs::dump()) |
return; |
+ */ |
if (!Func->isVerbose(IceV_AddrOpt)) |
return; |
OstreamLocker L(Func->getContext()); |
@@ -4122,11 +4130,13 @@ inline void dumpAddressOpt(const Cfg *Func, const Variable *Base, |
Index->dump(Func); |
else |
Str << "<null>"; |
- Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n"; |
+ Str << ", Shift=" << Shift << ", Offset=" << Offset |
+ << ", Relocatable=" << Relocatable << "\n"; |
} |
-inline bool matchTransitiveAssign(const VariablesMetadata *VMetadata, |
- Variable *&Var, const Inst *&Reason) { |
+inline bool matchAssign(const VariablesMetadata *VMetadata, Variable *&Var, |
+ ConstantRelocatable *&Relocatable, int32_t &Offset, |
+ const Inst *&Reason) { |
// Var originates from Var=SrcVar ==> set Var:=SrcVar |
if (Var == nullptr) |
return false; |
@@ -4143,6 +4153,21 @@ inline bool matchTransitiveAssign(const VariablesMetadata *VMetadata, |
Reason = VarAssign; |
return true; |
} |
+ } else if (auto Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) { |
Jim Stichnoth
2015/10/27 05:52:34
auto *
sehr
2015/10/27 21:48:00
Done.
|
+ int32_t MoreOffset = Const->getValue(); |
+ if (Utils::WouldOverflowAdd(Offset, MoreOffset)) |
Jim Stichnoth
2015/10/27 05:52:34
Is overflow an issue for x86-32? Can it be disall
sehr
2015/10/27 21:48:01
X86-64 immediates are also 32 bits.
|
+ return false; |
+ Var = nullptr; |
+ Offset += MoreOffset; |
+ Reason = VarAssign; |
+ return true; |
+ } else if (auto AddReloc = llvm::dyn_cast<ConstantRelocatable>(SrcOp)) { |
Jim Stichnoth
2015/10/27 05:52:34
auto *
sehr
2015/10/27 21:48:00
Done.
|
+ if (Relocatable == nullptr) { |
+ Var = nullptr; |
+ Relocatable = AddReloc; |
+ Reason = VarAssign; |
+ return true; |
+ } |
} |
} |
} |
@@ -4202,8 +4227,12 @@ inline bool matchShiftedIndex(const VariablesMetadata *VMetadata, |
if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) { |
Jim Stichnoth
2015/10/27 05:52:34
auto *
sehr
2015/10/27 21:48:01
Done.
|
if (ConstantInteger32 *Const = |
Jim Stichnoth
2015/10/27 05:52:34
auto *
sehr
2015/10/27 21:48:01
Done.
|
llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) { |
- if (ArithInst->getOp() == InstArithmetic::Mul && |
- !VMetadata->isMultiDef(Var) && Const->getType() == IceType_i32) { |
+ if (VMetadata->isMultiDef(Var) || Const->getType() != IceType_i32) |
+ return false; |
+ switch (ArithInst->getOp()) { |
+ default: |
+ return false; |
+ case InstArithmetic::Mul: { |
uint64_t Mult = Const->getValue(); |
Jim Stichnoth
2015/10/27 13:57:10
Maybe this ought to be uint32_t given the dyn_cast
sehr
2015/10/27 21:48:00
Done.
|
uint32_t LogMult; |
switch (Mult) { |
@@ -4229,6 +4258,24 @@ inline bool matchShiftedIndex(const VariablesMetadata *VMetadata, |
return true; |
} |
} |
+ case InstArithmetic::Shl: { |
+ uint32_t ShiftAmount = Const->getValue(); |
+ switch (ShiftAmount) { |
+ case 1: |
Jim Stichnoth
2015/10/27 13:57:10
Might as well as "case 0:" for symmetry with above
sehr
2015/10/27 21:48:00
Done.
|
+ case 2: |
+ case 3: |
+ break; |
+ default: |
+ return false; |
+ } |
+ if (Shift + ShiftAmount <= 3) { |
+ Index = Var; |
+ Shift += ShiftAmount; |
+ Reason = IndexInst; |
+ return true; |
+ } |
+ } |
+ } |
} |
} |
} |
@@ -4236,16 +4283,19 @@ inline bool matchShiftedIndex(const VariablesMetadata *VMetadata, |
} |
inline bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable *&Base, |
- int32_t &Offset, const Inst *&Reason) { |
+ ConstantRelocatable *&Relocatable, int32_t &Offset, |
+ const Inst *&Reason) { |
// Base is Base=Var+Const || Base is Base=Const+Var ==> |
// set Base=Var, Offset+=Const |
// Base is Base=Var-Const ==> |
// set Base=Var, Offset-=Const |
- if (Base == nullptr) |
+ if (Base == nullptr) { |
return false; |
+ } |
const Inst *BaseInst = VMetadata->getSingleDefinition(Base); |
- if (BaseInst == nullptr) |
+ if (BaseInst == nullptr) { |
return false; |
+ } |
assert(!VMetadata->isMultiDef(Base)); |
if (const InstArithmetic *ArithInst = |
Jim Stichnoth
2015/10/27 05:52:34
auto *
sehr
2015/10/27 21:48:00
Done.
|
llvm::dyn_cast<const InstArithmetic>(BaseInst)) { |
@@ -4253,32 +4303,76 @@ inline bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable *&Base, |
ArithInst->getOp() != InstArithmetic::Sub) |
return false; |
bool IsAdd = ArithInst->getOp() == InstArithmetic::Add; |
- Variable *Var = nullptr; |
- ConstantInteger32 *Const = nullptr; |
- if (Variable *VariableOperand = |
- llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) { |
- Var = VariableOperand; |
- Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1)); |
- } else if (IsAdd) { |
- Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(0)); |
- Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1)); |
- } |
- if (Var == nullptr || Const == nullptr || VMetadata->isMultiDef(Var)) |
+ Operand *Src0 = ArithInst->getSrc(0); |
+ Operand *Src1 = ArithInst->getSrc(1); |
+ Variable *Var0 = llvm::dyn_cast<Variable>(Src0); |
Jim Stichnoth
2015/10/27 05:52:34
auto * for all of these
sehr
2015/10/27 21:48:00
Done.
|
+ Variable *Var1 = llvm::dyn_cast<Variable>(Src1); |
+ ConstantInteger32 *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0); |
+ ConstantInteger32 *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1); |
+ ConstantRelocatable *Reloc0 = llvm::dyn_cast<ConstantRelocatable>(Src0); |
+ ConstantRelocatable *Reloc1 = llvm::dyn_cast<ConstantRelocatable>(Src1); |
+ Variable *NewBase = nullptr; |
+ int32_t NewOffset = Offset; |
+ ConstantRelocatable *NewRelocatable = Relocatable; |
+ if ((Var0 != nullptr) && (Var1 != nullptr)) |
Jim Stichnoth
2015/10/27 05:52:34
In these cases, I think omitting the "!= nullptr"
sehr
2015/10/27 21:48:00
Done.
|
+ // TODO(sehr): merge base/index splitting into here. |
return false; |
- int32_t MoreOffset = IsAdd ? Const->getValue() : -Const->getValue(); |
- if (Utils::WouldOverflowAdd(Offset, MoreOffset)) |
+ else if (!IsAdd && Var1 != nullptr) |
return false; |
- Base = Var; |
- Offset += MoreOffset; |
+ else if (Var0 != nullptr) |
+ NewBase = Var0; |
+ else if (Var1 != nullptr) |
+ NewBase = Var1; |
+ // Don't know how to add/subtract two relocatables. |
+ if (((Relocatable != nullptr) && |
+ ((Reloc0 != nullptr) || (Reloc1 != nullptr))) || |
+ ((Reloc0 != nullptr) && (Reloc1 != nullptr))) |
+ return false; |
+ // Don't know how to subtract a relocatable. |
+ if (!IsAdd && (Reloc1 != nullptr)) |
+ return false; |
+ // Incorporate ConstantRelocatables. |
+ if (Reloc0 != nullptr) |
+ NewRelocatable = Reloc0; |
+ else if (Reloc1 != nullptr) |
+ NewRelocatable = Reloc1; |
+ // Compute the updated constant offset. |
+ if (Const0 != nullptr) { |
+ int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue(); |
+ if (Utils::WouldOverflowAdd(NewOffset, MoreOffset)) |
Jim Stichnoth
2015/10/27 05:52:34
Same 32-bit overflow question as above.
sehr
2015/10/27 21:48:00
Same answer, sadly.
|
+ return false; |
+ NewOffset += MoreOffset; |
+ } |
+ if (Const1 != nullptr) { |
+ int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue(); |
+ if (Utils::WouldOverflowAdd(NewOffset, MoreOffset)) |
+ return false; |
+ NewOffset += MoreOffset; |
+ } |
+ // Update the computed address parameters once we are sure optimization |
+ // is valid. |
+ Base = NewBase; |
+ Offset = NewOffset; |
+ Relocatable = NewRelocatable; |
Reason = BaseInst; |
return true; |
} |
return false; |
} |
-inline void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base, |
- Variable *&Index, uint16_t &Shift, |
- int32_t &Offset) { |
+// Builds information for a canonical address expresion: |
+// <Relocatable + Offset>(Base, Index, Shift) |
+// On entry: |
+// Relocatable == null, |
+// Offset == 0, |
+// Base is a Variable, |
+// Index == nullptr, |
+// Shift == 0 |
+inline bool computeAddressOpt(Cfg *Func, const Inst *Instr, |
+ ConstantRelocatable *&Relocatable, |
+ int32_t &Offset, Variable *&Base, |
+ Variable *&Index, uint16_t &Shift) { |
+ bool AddressWasOptimized = false; |
Func->resetCurrentNode(); |
if (Func->isVerbose(IceV_AddrOpt)) { |
OstreamLocker L(Func->getContext()); |
@@ -4286,54 +4380,75 @@ inline void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base, |
Str << "\nStarting computeAddressOpt for instruction:\n "; |
Instr->dumpDecorated(Func); |
} |
- (void)Offset; // TODO: pattern-match for non-zero offsets. |
if (Base == nullptr) |
- return; |
+ return AddressWasOptimized; |
// If the Base has more than one use or is live across multiple blocks, then |
// don't go further. Alternatively (?), never consider a transformation that |
// would change a variable that is currently *not* live across basic block |
// boundaries into one that *is*. |
if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/) |
- return; |
+ return AddressWasOptimized; |
const bool MockBounds = Func->getContext()->getFlags().getMockBoundsCheck(); |
const VariablesMetadata *VMetadata = Func->getVMetadata(); |
- bool Continue = true; |
- while (Continue) { |
- const Inst *Reason = nullptr; |
- if (matchTransitiveAssign(VMetadata, Base, Reason) || |
- matchTransitiveAssign(VMetadata, Index, Reason) || |
- (!MockBounds && |
- matchCombinedBaseIndex(VMetadata, Base, Index, Shift, Reason)) || |
- (!MockBounds && matchShiftedIndex(VMetadata, Index, Shift, Reason)) || |
- matchOffsetBase(VMetadata, Base, Offset, Reason)) { |
- dumpAddressOpt(Func, Base, Index, Shift, Offset, Reason); |
- } else { |
- Continue = false; |
+ const Inst *Reason = nullptr; |
+ do { |
+ if (Reason) { |
+ dumpAddressOpt(Func, Relocatable, Offset, Base, Index, Shift, Reason); |
+ AddressWasOptimized = true; |
+ Reason = nullptr; |
} |
+ // Update Base and Index to follow through assignments to definitions. |
+ if (matchAssign(VMetadata, Base, Relocatable, Offset, Reason)) { |
+ // Assignments of Base from a Relocatable or ConstantInt32 can result |
+ // in Base becoming nullptr. To avoid code duplication in this loop we |
+ // prefer that Base be non-nullptr if possible. |
+ if ((Base == nullptr) && (Index != nullptr) && (Shift == 0)) |
+ std::swap(Base, Index); |
+ continue; |
+ } |
+ if (matchAssign(VMetadata, Index, Relocatable, Offset, Reason)) |
+ continue; |
- // Index is Index=Var<<Const && Const+Shift<=3 ==> |
- // Index=Var, Shift+=Const |
- |
- // Index is Index=Const*Var && log2(Const)+Shift<=3 ==> |
- // Index=Var, Shift+=log2(Const) |
- |
- // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==> |
- // swap(Index,Base) |
- // Similar for Base=Const*Var and Base=Var<<Const |
- |
+ if (!MockBounds) { |
+ // Transition from: |
+ // <Relocatable + Offset>(Base) to |
+ // <Relocatable + Offset>(Base, Index) |
+ if (matchCombinedBaseIndex(VMetadata, Base, Index, Shift, Reason)) |
+ continue; |
+ // Recognize multiply/shift and update Shift amount. |
+ // Index becomes Index=Var<<Const && Const+Shift<=3 ==> |
+ // Index=Var, Shift+=Const |
+ // Index becomes Index=Const*Var && log2(Const)+Shift<=3 ==> |
+ // Index=Var, Shift+=log2(Const) |
+ if (matchShiftedIndex(VMetadata, Index, Shift, Reason)) |
+ continue; |
+ // If Shift is zero, the choice of Base and Index was purely arbitrary. |
+ // Recognize multiply/shift and set Shift amount. |
+ // Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==> |
+ // swap(Index,Base) |
+ // Similar for Base=Const*Var and Base=Var<<Const |
+ if ((Shift == 0) && matchShiftedIndex(VMetadata, Base, Shift, Reason)) { |
+ std::swap(Base, Index); |
+ continue; |
+ } |
+ } |
+ // Update Offset to reflect additions/subtractions with constants and |
+ // relocatables. |
+ // TODO: consider overflow issues with respect to Offset. |
+ // TODO: handle symbolic constants. |
+ if (matchOffsetBase(VMetadata, Base, Relocatable, Offset, Reason)) |
+ continue; |
+ // TODO(sehr, stichnot): Handle updates of Index with Shift != 0. |
// Index is Index=Var+Const ==> |
// set Index=Var, Offset+=(Const<<Shift) |
- |
// Index is Index=Const+Var ==> |
// set Index=Var, Offset+=(Const<<Shift) |
- |
// Index is Index=Var-Const ==> |
// set Index=Var, Offset-=(Const<<Shift) |
- |
- // TODO: consider overflow issues with respect to Offset. |
- // TODO: handle symbolic constants. |
- } |
+ break; |
+ } while (Reason); |
+ return AddressWasOptimized; |
} |
/// Add a mock bounds check on the memory address before using it as a load or |
@@ -4415,8 +4530,9 @@ template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() { |
Variable *Dest = Inst->getDest(); |
Operand *Addr = Inst->getSrc(0); |
Variable *Index = nullptr; |
+ ConstantRelocatable *Relocatable = nullptr; |
uint16_t Shift = 0; |
- int32_t Offset = 0; // TODO: make Constant |
+ int32_t Offset = 0; |
// Vanilla ICE load instructions should not use the segment registers, and |
// computeAddressOpt only works at the level of Variables and Constants, not |
// other Traits::X86OperandMem, so there should be no mention of segment |
@@ -4424,10 +4540,16 @@ template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() { |
const typename Traits::X86OperandMem::SegmentRegisters SegmentReg = |
Traits::X86OperandMem::DefaultSegment; |
Variable *Base = llvm::dyn_cast<Variable>(Addr); |
Jim Stichnoth
2015/10/27 05:52:34
auto *
sehr
2015/10/27 21:48:00
Done.
|
- computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); |
- if (Base && Addr != Base) { |
+ if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) { |
Inst->setDeleted(); |
- Constant *OffsetOp = Ctx->getConstantInt32(Offset); |
+ Constant *OffsetOp = nullptr; |
+ if (Relocatable == nullptr) { |
+ OffsetOp = Ctx->getConstantInt32(Offset); |
+ } else { |
+ OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, |
+ Relocatable->getName(), |
+ Relocatable->getSuppressMangling()); |
+ } |
Addr = Traits::X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp, |
Index, Shift, SegmentReg); |
Context.insert(InstLoad::create(Func, Dest, Addr)); |
@@ -4623,8 +4745,9 @@ template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() { |
Operand *Data = Inst->getData(); |
Operand *Addr = Inst->getAddr(); |
Variable *Index = nullptr; |
+ ConstantRelocatable *Relocatable = nullptr; |
uint16_t Shift = 0; |
- int32_t Offset = 0; // TODO: make Constant |
+ int32_t Offset = 0; |
Variable *Base = llvm::dyn_cast<Variable>(Addr); |
Jim Stichnoth
2015/10/27 05:52:34
auto *
sehr
2015/10/27 21:48:01
Done.
|
// Vanilla ICE store instructions should not use the segment registers, and |
// computeAddressOpt only works at the level of Variables and Constants, not |
@@ -4632,10 +4755,16 @@ template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() { |
// registers there either. |
const typename Traits::X86OperandMem::SegmentRegisters SegmentReg = |
Traits::X86OperandMem::DefaultSegment; |
- computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); |
- if (Base && Addr != Base) { |
+ if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) { |
Inst->setDeleted(); |
- Constant *OffsetOp = Ctx->getConstantInt32(Offset); |
+ Constant *OffsetOp = nullptr; |
+ if (Relocatable == nullptr) { |
+ OffsetOp = Ctx->getConstantInt32(Offset); |
+ } else { |
+ OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, |
+ Relocatable->getName(), |
+ Relocatable->getSuppressMangling()); |
+ } |
Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp, |
Index, Shift, SegmentReg); |
InstStore *NewStore = InstStore::create(Func, Data, Addr); |