Chromium Code Reviews| Index: src/IceTargetLoweringX86BaseImpl.h |
| diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h |
| index 8739c77ac8e99351c19df3736faed4789cbc5be1..0e4482ac89d4a4b335ca03e7e8b671447d5e542d 100644 |
| --- a/src/IceTargetLoweringX86BaseImpl.h |
| +++ b/src/IceTargetLoweringX86BaseImpl.h |
| @@ -2851,15 +2851,19 @@ TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, const InstBr *Br) { |
| break; |
| case InstIcmp::Eq: |
| case InstIcmp::Ule: |
| - _mov(Temp, Src0LoRM); |
| - _or(Temp, Src0HiRM); |
| + // Mov Src0HiRM first, because it was legalized most recently, |
|
Jim Stichnoth
2015/10/27 05:52:34
Can you reformat this and other comments to 80-col
sehr
2015/10/27 21:48:01
Done.
|
| + // and will sometimes avoid a move before the OR. |
| + _mov(Temp, Src0HiRM); |
| + _or(Temp, Src0LoRM); |
| Context.insert(InstFakeUse::create(Func, Temp)); |
| setccOrBr(Traits::Cond::Br_e, Dest, Br); |
| return; |
| case InstIcmp::Ne: |
| case InstIcmp::Ugt: |
| - _mov(Temp, Src0LoRM); |
| - _or(Temp, Src0HiRM); |
| + // Mov Src0HiRM first, because it was legalized most recently, |
| + // and will sometimes avoid a move before the OR. |
| + _mov(Temp, Src0HiRM); |
| + _or(Temp, Src0LoRM); |
| Context.insert(InstFakeUse::create(Func, Temp)); |
| setccOrBr(Traits::Cond::Br_ne, Dest, Br); |
| return; |
| @@ -4101,11 +4105,15 @@ inline bool isAdd(const Inst *Inst) { |
| return false; |
| } |
| -inline void dumpAddressOpt(const Cfg *Func, const Variable *Base, |
| +inline void dumpAddressOpt(const Cfg *Func, |
| + const ConstantRelocatable *Relocatable, |
| + int32_t Offset, const Variable *Base, |
| const Variable *Index, uint16_t Shift, |
| - int32_t Offset, const Inst *Reason) { |
| + const Inst *Reason) { |
| + /* |
|
Jim Stichnoth
2015/10/27 05:52:34
Why is this commented out?
sehr
2015/10/27 21:48:01
Done.
|
| if (!BuildDefs::dump()) |
| return; |
| + */ |
| if (!Func->isVerbose(IceV_AddrOpt)) |
| return; |
| OstreamLocker L(Func->getContext()); |
| @@ -4122,11 +4130,13 @@ inline void dumpAddressOpt(const Cfg *Func, const Variable *Base, |
| Index->dump(Func); |
| else |
| Str << "<null>"; |
| - Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n"; |
| + Str << ", Shift=" << Shift << ", Offset=" << Offset |
| + << ", Relocatable=" << Relocatable << "\n"; |
| } |
| -inline bool matchTransitiveAssign(const VariablesMetadata *VMetadata, |
| - Variable *&Var, const Inst *&Reason) { |
| +inline bool matchAssign(const VariablesMetadata *VMetadata, Variable *&Var, |
| + ConstantRelocatable *&Relocatable, int32_t &Offset, |
| + const Inst *&Reason) { |
| // Var originates from Var=SrcVar ==> set Var:=SrcVar |
| if (Var == nullptr) |
| return false; |
| @@ -4143,6 +4153,21 @@ inline bool matchTransitiveAssign(const VariablesMetadata *VMetadata, |
| Reason = VarAssign; |
| return true; |
| } |
| + } else if (auto Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) { |
|
Jim Stichnoth
2015/10/27 05:52:34
auto *
sehr
2015/10/27 21:48:00
Done.
|
| + int32_t MoreOffset = Const->getValue(); |
| + if (Utils::WouldOverflowAdd(Offset, MoreOffset)) |
|
Jim Stichnoth
2015/10/27 05:52:34
Is overflow an issue for x86-32? Can it be disall
sehr
2015/10/27 21:48:01
X86-64 immediates are also 32 bits.
|
| + return false; |
| + Var = nullptr; |
| + Offset += MoreOffset; |
| + Reason = VarAssign; |
| + return true; |
| + } else if (auto AddReloc = llvm::dyn_cast<ConstantRelocatable>(SrcOp)) { |
|
Jim Stichnoth
2015/10/27 05:52:34
auto *
sehr
2015/10/27 21:48:00
Done.
|
| + if (Relocatable == nullptr) { |
| + Var = nullptr; |
| + Relocatable = AddReloc; |
| + Reason = VarAssign; |
| + return true; |
| + } |
| } |
| } |
| } |
| @@ -4202,8 +4227,12 @@ inline bool matchShiftedIndex(const VariablesMetadata *VMetadata, |
| if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) { |
|
Jim Stichnoth
2015/10/27 05:52:34
auto *
sehr
2015/10/27 21:48:01
Done.
|
| if (ConstantInteger32 *Const = |
|
Jim Stichnoth
2015/10/27 05:52:34
auto *
sehr
2015/10/27 21:48:01
Done.
|
| llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) { |
| - if (ArithInst->getOp() == InstArithmetic::Mul && |
| - !VMetadata->isMultiDef(Var) && Const->getType() == IceType_i32) { |
| + if (VMetadata->isMultiDef(Var) || Const->getType() != IceType_i32) |
| + return false; |
| + switch (ArithInst->getOp()) { |
| + default: |
| + return false; |
| + case InstArithmetic::Mul: { |
| uint64_t Mult = Const->getValue(); |
|
Jim Stichnoth
2015/10/27 13:57:10
Maybe this ought to be uint32_t given the dyn_cast
sehr
2015/10/27 21:48:00
Done.
|
| uint32_t LogMult; |
| switch (Mult) { |
| @@ -4229,6 +4258,24 @@ inline bool matchShiftedIndex(const VariablesMetadata *VMetadata, |
| return true; |
| } |
| } |
| + case InstArithmetic::Shl: { |
| + uint32_t ShiftAmount = Const->getValue(); |
| + switch (ShiftAmount) { |
| + case 1: |
|
Jim Stichnoth
2015/10/27 13:57:10
Might as well as "case 0:" for symmetry with above
sehr
2015/10/27 21:48:00
Done.
|
| + case 2: |
| + case 3: |
| + break; |
| + default: |
| + return false; |
| + } |
| + if (Shift + ShiftAmount <= 3) { |
| + Index = Var; |
| + Shift += ShiftAmount; |
| + Reason = IndexInst; |
| + return true; |
| + } |
| + } |
| + } |
| } |
| } |
| } |
| @@ -4236,16 +4283,19 @@ inline bool matchShiftedIndex(const VariablesMetadata *VMetadata, |
| } |
| inline bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable *&Base, |
| - int32_t &Offset, const Inst *&Reason) { |
| + ConstantRelocatable *&Relocatable, int32_t &Offset, |
| + const Inst *&Reason) { |
| // Base is Base=Var+Const || Base is Base=Const+Var ==> |
| // set Base=Var, Offset+=Const |
| // Base is Base=Var-Const ==> |
| // set Base=Var, Offset-=Const |
| - if (Base == nullptr) |
| + if (Base == nullptr) { |
| return false; |
| + } |
| const Inst *BaseInst = VMetadata->getSingleDefinition(Base); |
| - if (BaseInst == nullptr) |
| + if (BaseInst == nullptr) { |
| return false; |
| + } |
| assert(!VMetadata->isMultiDef(Base)); |
| if (const InstArithmetic *ArithInst = |
|
Jim Stichnoth
2015/10/27 05:52:34
auto *
sehr
2015/10/27 21:48:00
Done.
|
| llvm::dyn_cast<const InstArithmetic>(BaseInst)) { |
| @@ -4253,32 +4303,76 @@ inline bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable *&Base, |
| ArithInst->getOp() != InstArithmetic::Sub) |
| return false; |
| bool IsAdd = ArithInst->getOp() == InstArithmetic::Add; |
| - Variable *Var = nullptr; |
| - ConstantInteger32 *Const = nullptr; |
| - if (Variable *VariableOperand = |
| - llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) { |
| - Var = VariableOperand; |
| - Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1)); |
| - } else if (IsAdd) { |
| - Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(0)); |
| - Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1)); |
| - } |
| - if (Var == nullptr || Const == nullptr || VMetadata->isMultiDef(Var)) |
| + Operand *Src0 = ArithInst->getSrc(0); |
| + Operand *Src1 = ArithInst->getSrc(1); |
| + Variable *Var0 = llvm::dyn_cast<Variable>(Src0); |
|
Jim Stichnoth
2015/10/27 05:52:34
auto * for all of these
sehr
2015/10/27 21:48:00
Done.
|
| + Variable *Var1 = llvm::dyn_cast<Variable>(Src1); |
| + ConstantInteger32 *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0); |
| + ConstantInteger32 *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1); |
| + ConstantRelocatable *Reloc0 = llvm::dyn_cast<ConstantRelocatable>(Src0); |
| + ConstantRelocatable *Reloc1 = llvm::dyn_cast<ConstantRelocatable>(Src1); |
| + Variable *NewBase = nullptr; |
| + int32_t NewOffset = Offset; |
| + ConstantRelocatable *NewRelocatable = Relocatable; |
| + if ((Var0 != nullptr) && (Var1 != nullptr)) |
|
Jim Stichnoth
2015/10/27 05:52:34
In these cases, I think omitting the "!= nullptr"
sehr
2015/10/27 21:48:00
Done.
|
| + // TODO(sehr): merge base/index splitting into here. |
| return false; |
| - int32_t MoreOffset = IsAdd ? Const->getValue() : -Const->getValue(); |
| - if (Utils::WouldOverflowAdd(Offset, MoreOffset)) |
| + else if (!IsAdd && Var1 != nullptr) |
| return false; |
| - Base = Var; |
| - Offset += MoreOffset; |
| + else if (Var0 != nullptr) |
| + NewBase = Var0; |
| + else if (Var1 != nullptr) |
| + NewBase = Var1; |
| + // Don't know how to add/subtract two relocatables. |
| + if (((Relocatable != nullptr) && |
| + ((Reloc0 != nullptr) || (Reloc1 != nullptr))) || |
| + ((Reloc0 != nullptr) && (Reloc1 != nullptr))) |
| + return false; |
| + // Don't know how to subtract a relocatable. |
| + if (!IsAdd && (Reloc1 != nullptr)) |
| + return false; |
| + // Incorporate ConstantRelocatables. |
| + if (Reloc0 != nullptr) |
| + NewRelocatable = Reloc0; |
| + else if (Reloc1 != nullptr) |
| + NewRelocatable = Reloc1; |
| + // Compute the updated constant offset. |
| + if (Const0 != nullptr) { |
| + int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue(); |
| + if (Utils::WouldOverflowAdd(NewOffset, MoreOffset)) |
|
Jim Stichnoth
2015/10/27 05:52:34
Same 32-bit overflow question as above.
sehr
2015/10/27 21:48:00
Same answer, sadly.
|
| + return false; |
| + NewOffset += MoreOffset; |
| + } |
| + if (Const1 != nullptr) { |
| + int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue(); |
| + if (Utils::WouldOverflowAdd(NewOffset, MoreOffset)) |
| + return false; |
| + NewOffset += MoreOffset; |
| + } |
| + // Update the computed address parameters once we are sure optimization |
| + // is valid. |
| + Base = NewBase; |
| + Offset = NewOffset; |
| + Relocatable = NewRelocatable; |
| Reason = BaseInst; |
| return true; |
| } |
| return false; |
| } |
| -inline void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base, |
| - Variable *&Index, uint16_t &Shift, |
| - int32_t &Offset) { |
| +// Builds information for a canonical address expresion: |
| +// <Relocatable + Offset>(Base, Index, Shift) |
| +// On entry: |
| +// Relocatable == null, |
| +// Offset == 0, |
| +// Base is a Variable, |
| +// Index == nullptr, |
| +// Shift == 0 |
| +inline bool computeAddressOpt(Cfg *Func, const Inst *Instr, |
| + ConstantRelocatable *&Relocatable, |
| + int32_t &Offset, Variable *&Base, |
| + Variable *&Index, uint16_t &Shift) { |
| + bool AddressWasOptimized = false; |
| Func->resetCurrentNode(); |
| if (Func->isVerbose(IceV_AddrOpt)) { |
| OstreamLocker L(Func->getContext()); |
| @@ -4286,54 +4380,75 @@ inline void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base, |
| Str << "\nStarting computeAddressOpt for instruction:\n "; |
| Instr->dumpDecorated(Func); |
| } |
| - (void)Offset; // TODO: pattern-match for non-zero offsets. |
| if (Base == nullptr) |
| - return; |
| + return AddressWasOptimized; |
| // If the Base has more than one use or is live across multiple blocks, then |
| // don't go further. Alternatively (?), never consider a transformation that |
| // would change a variable that is currently *not* live across basic block |
| // boundaries into one that *is*. |
| if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/) |
| - return; |
| + return AddressWasOptimized; |
| const bool MockBounds = Func->getContext()->getFlags().getMockBoundsCheck(); |
| const VariablesMetadata *VMetadata = Func->getVMetadata(); |
| - bool Continue = true; |
| - while (Continue) { |
| - const Inst *Reason = nullptr; |
| - if (matchTransitiveAssign(VMetadata, Base, Reason) || |
| - matchTransitiveAssign(VMetadata, Index, Reason) || |
| - (!MockBounds && |
| - matchCombinedBaseIndex(VMetadata, Base, Index, Shift, Reason)) || |
| - (!MockBounds && matchShiftedIndex(VMetadata, Index, Shift, Reason)) || |
| - matchOffsetBase(VMetadata, Base, Offset, Reason)) { |
| - dumpAddressOpt(Func, Base, Index, Shift, Offset, Reason); |
| - } else { |
| - Continue = false; |
| + const Inst *Reason = nullptr; |
| + do { |
| + if (Reason) { |
| + dumpAddressOpt(Func, Relocatable, Offset, Base, Index, Shift, Reason); |
| + AddressWasOptimized = true; |
| + Reason = nullptr; |
| } |
| + // Update Base and Index to follow through assignments to definitions. |
| + if (matchAssign(VMetadata, Base, Relocatable, Offset, Reason)) { |
| + // Assignments of Base from a Relocatable or ConstantInt32 can result |
| + // in Base becoming nullptr. To avoid code duplication in this loop we |
| + // prefer that Base be non-nullptr if possible. |
| + if ((Base == nullptr) && (Index != nullptr) && (Shift == 0)) |
| + std::swap(Base, Index); |
| + continue; |
| + } |
| + if (matchAssign(VMetadata, Index, Relocatable, Offset, Reason)) |
| + continue; |
| - // Index is Index=Var<<Const && Const+Shift<=3 ==> |
| - // Index=Var, Shift+=Const |
| - |
| - // Index is Index=Const*Var && log2(Const)+Shift<=3 ==> |
| - // Index=Var, Shift+=log2(Const) |
| - |
| - // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==> |
| - // swap(Index,Base) |
| - // Similar for Base=Const*Var and Base=Var<<Const |
| - |
| + if (!MockBounds) { |
| + // Transition from: |
| + // <Relocatable + Offset>(Base) to |
| + // <Relocatable + Offset>(Base, Index) |
| + if (matchCombinedBaseIndex(VMetadata, Base, Index, Shift, Reason)) |
| + continue; |
| + // Recognize multiply/shift and update Shift amount. |
| + // Index becomes Index=Var<<Const && Const+Shift<=3 ==> |
| + // Index=Var, Shift+=Const |
| + // Index becomes Index=Const*Var && log2(Const)+Shift<=3 ==> |
| + // Index=Var, Shift+=log2(Const) |
| + if (matchShiftedIndex(VMetadata, Index, Shift, Reason)) |
| + continue; |
| + // If Shift is zero, the choice of Base and Index was purely arbitrary. |
| + // Recognize multiply/shift and set Shift amount. |
| + // Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==> |
| + // swap(Index,Base) |
| + // Similar for Base=Const*Var and Base=Var<<Const |
| + if ((Shift == 0) && matchShiftedIndex(VMetadata, Base, Shift, Reason)) { |
| + std::swap(Base, Index); |
| + continue; |
| + } |
| + } |
| + // Update Offset to reflect additions/subtractions with constants and |
| + // relocatables. |
| + // TODO: consider overflow issues with respect to Offset. |
| + // TODO: handle symbolic constants. |
| + if (matchOffsetBase(VMetadata, Base, Relocatable, Offset, Reason)) |
| + continue; |
| + // TODO(sehr, stichnot): Handle updates of Index with Shift != 0. |
| // Index is Index=Var+Const ==> |
| // set Index=Var, Offset+=(Const<<Shift) |
| - |
| // Index is Index=Const+Var ==> |
| // set Index=Var, Offset+=(Const<<Shift) |
| - |
| // Index is Index=Var-Const ==> |
| // set Index=Var, Offset-=(Const<<Shift) |
| - |
| - // TODO: consider overflow issues with respect to Offset. |
| - // TODO: handle symbolic constants. |
| - } |
| + break; |
| + } while (Reason); |
| + return AddressWasOptimized; |
| } |
| /// Add a mock bounds check on the memory address before using it as a load or |
| @@ -4415,8 +4530,9 @@ template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() { |
| Variable *Dest = Inst->getDest(); |
| Operand *Addr = Inst->getSrc(0); |
| Variable *Index = nullptr; |
| + ConstantRelocatable *Relocatable = nullptr; |
| uint16_t Shift = 0; |
| - int32_t Offset = 0; // TODO: make Constant |
| + int32_t Offset = 0; |
| // Vanilla ICE load instructions should not use the segment registers, and |
| // computeAddressOpt only works at the level of Variables and Constants, not |
| // other Traits::X86OperandMem, so there should be no mention of segment |
| @@ -4424,10 +4540,16 @@ template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() { |
| const typename Traits::X86OperandMem::SegmentRegisters SegmentReg = |
| Traits::X86OperandMem::DefaultSegment; |
| Variable *Base = llvm::dyn_cast<Variable>(Addr); |
|
Jim Stichnoth
2015/10/27 05:52:34
auto *
sehr
2015/10/27 21:48:00
Done.
|
| - computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); |
| - if (Base && Addr != Base) { |
| + if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) { |
| Inst->setDeleted(); |
| - Constant *OffsetOp = Ctx->getConstantInt32(Offset); |
| + Constant *OffsetOp = nullptr; |
| + if (Relocatable == nullptr) { |
| + OffsetOp = Ctx->getConstantInt32(Offset); |
| + } else { |
| + OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, |
| + Relocatable->getName(), |
| + Relocatable->getSuppressMangling()); |
| + } |
| Addr = Traits::X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp, |
| Index, Shift, SegmentReg); |
| Context.insert(InstLoad::create(Func, Dest, Addr)); |
| @@ -4623,8 +4745,9 @@ template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() { |
| Operand *Data = Inst->getData(); |
| Operand *Addr = Inst->getAddr(); |
| Variable *Index = nullptr; |
| + ConstantRelocatable *Relocatable = nullptr; |
| uint16_t Shift = 0; |
| - int32_t Offset = 0; // TODO: make Constant |
| + int32_t Offset = 0; |
| Variable *Base = llvm::dyn_cast<Variable>(Addr); |
|
Jim Stichnoth
2015/10/27 05:52:34
auto *
sehr
2015/10/27 21:48:01
Done.
|
| // Vanilla ICE store instructions should not use the segment registers, and |
| // computeAddressOpt only works at the level of Variables and Constants, not |
| @@ -4632,10 +4755,16 @@ template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() { |
| // registers there either. |
| const typename Traits::X86OperandMem::SegmentRegisters SegmentReg = |
| Traits::X86OperandMem::DefaultSegment; |
| - computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); |
| - if (Base && Addr != Base) { |
| + if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) { |
| Inst->setDeleted(); |
| - Constant *OffsetOp = Ctx->getConstantInt32(Offset); |
| + Constant *OffsetOp = nullptr; |
| + if (Relocatable == nullptr) { |
| + OffsetOp = Ctx->getConstantInt32(Offset); |
| + } else { |
| + OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, |
| + Relocatable->getName(), |
| + Relocatable->getSuppressMangling()); |
| + } |
| Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp, |
| Index, Shift, SegmentReg); |
| InstStore *NewStore = InstStore::create(Func, Data, Addr); |